Skip to content

Commit

Permalink
v1.4.1 (#1568)
Browse files Browse the repository at this point in the history
  • Loading branch information
OlivierDehaene authored Feb 16, 2024
1 parent 0f2daad commit 4139054
Show file tree
Hide file tree
Showing 13 changed files with 1,302 additions and 763 deletions.
303 changes: 176 additions & 127 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ members = [
resolver = "2"

[workspace.package]
version = "1.4.0"
version = "1.4.1"
edition = "2021"
authors = ["Olivier Dehaene"]
homepage = "https://github.com/huggingface/text-generation-inference"
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ COPY server/Makefile server/Makefile
RUN cd server && \
make gen-server && \
pip install -r requirements_cuda.txt && \
pip install ".[bnb, accelerate, quantize, peft]" --no-cache-dir
pip install ".[bnb, accelerate, quantize, peft, outlines]" --no-cache-dir

# Install benchmarker
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile_amd
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ COPY server/Makefile server/Makefile
RUN cd server && \
make gen-server && \
pip install -r requirements_rocm.txt && \
pip install ".[accelerate, peft]" --no-cache-dir
pip install ".[accelerate, peft, outlines]" --no-cache-dir

# Install benchmarker
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
Expand Down
40 changes: 35 additions & 5 deletions docs/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"name": "Apache 2.0",
"url": "https://www.apache.org/licenses/LICENSE-2.0"
},
"version": "1.4.0"
"version": "1.4.1"
},
"paths": {
"/": {
Expand Down Expand Up @@ -590,8 +590,11 @@
"minimum": 0
},
"logprobs": {
"type": "number",
"format": "float",
"allOf": [
{
"$ref": "#/components/schemas/ChatCompletionLogprobs"
}
],
"nullable": true
}
}
Expand Down Expand Up @@ -710,7 +713,7 @@
"presence_penalty": {
"type": "number",
"format": "float",
"description": "UNUSED\nNumber between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,\nincreasing the model's likelihood to talk about new topics",
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,\nincreasing the model's likelihood to talk about new topics",
"example": 0.1,
"nullable": true
},
Expand All @@ -734,7 +737,7 @@
"top_logprobs": {
"type": "integer",
"format": "int32",
"description": "UNUSED\nAn integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with\nan associated log probability. logprobs must be set to true if this parameter is used.",
"description": "An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with\nan associated log probability. logprobs must be set to true if this parameter is used.",
"example": "5",
"nullable": true,
"minimum": 0
Expand Down Expand Up @@ -870,6 +873,22 @@
"default": "false",
"example": true
},
"frequency_penalty": {
"type": "number",
"format": "float",
"default": "null",
"example": 0.1,
"nullable": true,
"exclusiveMinimum": -2
},
"grammar": {
"allOf": [
{
"$ref": "#/components/schemas/GrammarType"
}
],
"nullable": true
},
"max_new_tokens": {
"type": "integer",
"format": "int32",
Expand Down Expand Up @@ -1026,6 +1045,12 @@
"example": "null",
"nullable": true
},
"max_batch_size": {
"type": "integer",
"example": "null",
"nullable": true,
"minimum": 0
},
"max_batch_total_tokens": {
"type": "integer",
"format": "int32",
Expand Down Expand Up @@ -1119,6 +1144,11 @@
"type": "string",
"example": "My name is David and I"
},
"name": {
"type": "string",
"example": "\"David\"",
"nullable": true
},
"role": {
"type": "string",
"example": "user"
Expand Down
2 changes: 1 addition & 1 deletion integration-tests/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "text-generation-integration-tests"
version = "1.4.0"
version = "1.4.1"
description = "Text Generation Inference integration tests"
authors = ["Nicolas Patry <[email protected]>"]

Expand Down
2 changes: 1 addition & 1 deletion server/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ install-megablocks:
install: gen-server
pip install pip --upgrade
pip install -r requirements_cuda.txt
pip install -e ".[bnb, accelerate, quantize, peft]"
pip install -e ".[bnb, accelerate, quantize, peft, outlines]"

run-dev:
SAFETENSORS_FAST_GPU=1 python -m torch.distributed.run --nproc_per_node=2 text_generation_server/cli.py serve bigscience/bloom-560m --sharded
Expand Down
Loading

0 comments on commit 4139054

Please sign in to comment.