Skip to content

Commit

Permalink
v0.9.0 (#525)
Browse files Browse the repository at this point in the history
  • Loading branch information
OlivierDehaene authored Jul 1, 2023
1 parent 2b53d71 commit e28a809
Show file tree
Hide file tree
Showing 16 changed files with 375 additions and 257 deletions.
503 changes: 323 additions & 180 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ members = [
]

[workspace.package]
version = "0.8.2"
version = "0.9.0"
edition = "2021"
authors = ["Olivier Dehaene"]
homepage = "https://github.com/huggingface/text-generation-inference"
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Rust builder
FROM lukemathwalker/cargo-chef:latest-rust-1.69 AS chef
FROM lukemathwalker/cargo-chef:latest-rust-1.70 AS chef
WORKDIR /usr/src

ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ model=bigscience/bloom-560m
num_shard=2
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run

docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:0.8 --model-id $model --num-shard $num_shard
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:0.9 --model-id $model --num-shard $num_shard
```
**Note:** To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 11.8 or higher.

Expand Down
15 changes: 0 additions & 15 deletions aml/README.md

This file was deleted.

38 changes: 0 additions & 38 deletions aml/deployment.yaml

This file was deleted.

3 changes: 0 additions & 3 deletions aml/endpoint.yaml

This file was deleted.

3 changes: 0 additions & 3 deletions aml/model.yaml

This file was deleted.

31 changes: 30 additions & 1 deletion docs/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"name": "Apache 2.0",
"url": "https://www.apache.org/licenses/LICENSE-2.0"
},
"version": "0.8.2"
"version": "0.9.0"
},
"paths": {
"/": {
Expand Down Expand Up @@ -270,6 +270,35 @@
}
}
},
"/health": {
"get": {
"tags": [
"Text Generation Inference"
],
"summary": "Health check method",
"description": "Health check method",
"operationId": "health",
"responses": {
"200": {
"description": "Everything is working fine"
},
"503": {
"description": "Text generation inference is down",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponse"
},
"example": {
"error": "unhealthy",
"error_type": "healthcheck"
}
}
}
}
}
}
},
"/info": {
"get": {
"tags": [
Expand Down
8 changes: 6 additions & 2 deletions launcher/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1040,14 +1040,18 @@ fn main() -> Result<(), LauncherError> {
return Ok(());
}

let mut webserver = spawn_webserver(args, shutdown.clone(), &shutdown_receiver)?;
let mut webserver =
spawn_webserver(args, shutdown.clone(), &shutdown_receiver).map_err(|err| {
shutdown_shards(shutdown.clone(), &shutdown_receiver);
err
})?;

// Default exit code
let mut exit_code = Ok(());

while running.load(Ordering::SeqCst) {
if let Ok(ShardStatus::Failed((rank, err))) = status_receiver.try_recv() {
tracing::error!("Shard {rank} failed to start");
tracing::error!("Shard {rank} crashed");
if let Some(err) = err {
tracing::error!("{err}");
}
Expand Down
10 changes: 5 additions & 5 deletions router/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ text-generation-client = { path = "client" }
clap = { version = "4.1.4", features = ["derive", "env"] }
flume = "0.10.14"
futures = "0.3.26"
metrics = "0.20.1"
metrics-exporter-prometheus = { version = "0.11.0", features = [] }
metrics = "0.21.0"
metrics-exporter-prometheus = { version = "0.12.1", features = [] }
nohash-hasher = "0.2.0"
opentelemetry = { version = "0.18.0", features = ["rt-tokio"] }
opentelemetry-otlp = "0.11.0"
opentelemetry = { version = "0.19.0", features = ["rt-tokio"] }
opentelemetry-otlp = "0.12.0"
rand = "0.8.5"
reqwest = { version = "0.11.14", features = [] }
serde = "1.0.152"
Expand All @@ -36,7 +36,7 @@ tokenizers = "0.13.3"
tokio = { version = "1.25.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
tower-http = { version = "0.4.0", features = ["cors"] }
tracing = "0.1.37"
tracing-opentelemetry = "0.18.0"
tracing-opentelemetry = "0.19.0"
tracing-subscriber = { version = "0.3.16", features = ["json", "env-filter"] }
utoipa = { version = "3.0.1", features = ["axum_extras"] }
utoipa-swagger-ui = { version = "3.0.2", features = ["axum"] }
Expand Down
4 changes: 2 additions & 2 deletions router/client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ grpc-metadata = { path = "../grpc-metadata" }
prost = "^0.11"
thiserror = "^1.0"
tokio = { version = "^1.25", features = ["sync"] }
tonic = "^0.8"
tonic = "^0.9"
tower = "^0.4"
tracing = "^0.1"

[build-dependencies]
tonic-build = "0.8.4"
tonic-build = "0.9.2"
prost-build = "0.11.6"
6 changes: 3 additions & 3 deletions router/grpc-metadata/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ version = "0.1.0"
edition = "2021"

[dependencies]
opentelemetry = "0.18.0"
tonic = "^0.8"
opentelemetry = "^0.19"
tonic = "^0.9"
tracing = "^0.1"
tracing-opentelemetry = "0.18.0"
tracing-opentelemetry = "^0.19"
1 change: 1 addition & 0 deletions router/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,7 @@ pub async fn run(
#[derive(OpenApi)]
#[openapi(
paths(
health,
get_model_info,
compat_generate,
generate,
Expand Down
2 changes: 1 addition & 1 deletion rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[toolchain]
channel = "1.69.0"
channel = "1.70.0"
components = ["rustfmt", "clippy"]
2 changes: 1 addition & 1 deletion server/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "text-generation-server"
version = "0.8.2"
version = "0.9.0"
description = "Text Generation Inference Python gRPC Server"
authors = ["Olivier Dehaene <[email protected]>"]

Expand Down

0 comments on commit e28a809

Please sign in to comment.