Skip to content

Commit 02715dc

Browse files
mht-sharmaNarsil
andauthored
Add option to configure prometheus port (#3187)
* add prometheus port * fix doc * add port for trtllm and llamacpp * Fixing format after rebase. --------- Co-authored-by: Nicolas Patry <[email protected]>
1 parent 8f88197 commit 02715dc

File tree

7 files changed

+38
-0
lines changed

7 files changed

+38
-0
lines changed

backends/llamacpp/src/main.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,9 @@ struct Args {
119119
#[clap(default_value = "3000", long, short, env)]
120120
port: u16,
121121

122+
#[clap(default_value = "9000", long, short, env)]
123+
prometheus_port: u16,
124+
122125
/// Enable JSON output format.
123126
#[clap(long, env)]
124127
json_output: bool,
@@ -317,6 +320,7 @@ async fn main() -> Result<(), RouterError> {
317320
args.max_client_batch_size,
318321
args.usage_stats,
319322
args.payload_limit,
323+
args.prometheus_port,
320324
)
321325
.await?;
322326
Ok(())

backends/trtllm/src/main.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ struct Args {
3737
hostname: String,
3838
#[clap(default_value = "3000", long, short, env)]
3939
port: u16,
40+
#[clap(default_value = "9000", long, short, env)]
41+
prometheus_port: u16,
4042
#[clap(long, env, required = true)]
4143
tokenizer_name: String,
4244
#[clap(long, env)]
@@ -227,6 +229,7 @@ async fn main() -> Result<(), TensorRtLlmBackendError> {
227229
max_batch_total_tokens,
228230
hostname,
229231
port,
232+
prometheus_port,
230233
tokenizer_name,
231234
tokenizer_config_path,
232235
revision,
@@ -322,6 +325,7 @@ async fn main() -> Result<(), TensorRtLlmBackendError> {
322325
max_client_batch_size,
323326
usage_stats,
324327
payload_limit,
328+
prometheus_port,
325329
)
326330
.await?;
327331
Ok(())

backends/v2/src/main.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ struct Args {
3636
hostname: String,
3737
#[clap(default_value = "3000", long, short, env)]
3838
port: u16,
39+
#[clap(default_value = "9000", long, short, env)]
40+
prometheus_port: u16,
3941
#[clap(default_value = "/tmp/text-generation-server-0", long, env)]
4042
master_shard_uds_path: String,
4143
#[clap(default_value = "bigscience/bloom", long, env)]
@@ -99,6 +101,7 @@ async fn main() -> Result<(), RouterError> {
99101
max_batch_size,
100102
hostname,
101103
port,
104+
prometheus_port,
102105
master_shard_uds_path,
103106
tokenizer_name,
104107
tokenizer_config_path,
@@ -198,6 +201,7 @@ async fn main() -> Result<(), RouterError> {
198201
max_client_batch_size,
199202
usage_stats,
200203
payload_limit,
204+
prometheus_port,
201205
)
202206
.await?;
203207
Ok(())

backends/v3/src/main.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ struct Args {
3636
hostname: String,
3737
#[clap(default_value = "3000", long, short, env)]
3838
port: u16,
39+
#[clap(default_value = "9000", long, short, env)]
40+
prometheus_port: u16,
3941
#[clap(default_value = "/tmp/text-generation-server-0", long, env)]
4042
master_shard_uds_path: String,
4143
#[clap(default_value = "bigscience/bloom", long, env)]
@@ -99,6 +101,7 @@ async fn main() -> Result<(), RouterError> {
99101
max_batch_size,
100102
hostname,
101103
port,
104+
prometheus_port,
102105
master_shard_uds_path,
103106
tokenizer_name,
104107
tokenizer_config_path,
@@ -214,6 +217,7 @@ async fn main() -> Result<(), RouterError> {
214217
max_client_batch_size,
215218
usage_stats,
216219
payload_limit,
220+
prometheus_port,
217221
)
218222
.await?;
219223
Ok(())

docs/source/reference/launcher.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,15 @@ Options:
251251
[env: PORT=]
252252
[default: 3000]
253253

254+
```
255+
## PROMETHEUS_PORT
256+
```shell
257+
-p, --prometheus-port <PROMETHEUS_PORT>
258+
The Prometheus port to listen on
259+
260+
[env: PROMETHEUS_PORT=]
261+
[default: 9000]
262+
254263
```
255264
## SHARD_UDS_PATH
256265
```shell

launcher/src/main.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,10 @@ struct Args {
773773
#[clap(default_value = "3000", long, short, env)]
774774
port: u16,
775775

776+
/// The Prometheus port to listen on.
777+
#[clap(default_value = "9000", long, short, env)]
778+
prometheus_port: u16,
779+
776780
/// The name of the socket for gRPC communication between the webserver
777781
/// and the shards.
778782
#[clap(default_value = "/tmp/text-generation-server", long, env)]
@@ -1848,6 +1852,8 @@ fn spawn_webserver(
18481852
args.hostname.to_string(),
18491853
"--port".to_string(),
18501854
args.port.to_string(),
1855+
"--prometheus-port".to_string(),
1856+
args.prometheus_port.to_string(),
18511857
"--master-shard-uds-path".to_string(),
18521858
format!("{}-0", args.shard_uds_path),
18531859
"--tokenizer-name".to_string(),

router/src/server.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1522,6 +1522,7 @@ pub async fn run(
15221522
max_client_batch_size: usize,
15231523
usage_stats_level: usage_stats::UsageStatsLevel,
15241524
payload_limit: usize,
1525+
prometheus_port: u16,
15251526
) -> Result<(), WebServerError> {
15261527
// CORS allowed origins
15271528
// map to go inside the option and then map to parse from String to HeaderValue
@@ -1825,6 +1826,7 @@ pub async fn run(
18251826
compat_return_full_text,
18261827
allow_origin,
18271828
payload_limit,
1829+
prometheus_port,
18281830
)
18291831
.await;
18301832

@@ -1886,6 +1888,7 @@ async fn start(
18861888
compat_return_full_text: bool,
18871889
allow_origin: Option<AllowOrigin>,
18881890
payload_limit: usize,
1891+
prometheus_port: u16,
18891892
) -> Result<(), WebServerError> {
18901893
// Determine the server port based on the feature and environment variable.
18911894
let port = if cfg!(feature = "google") {
@@ -1959,8 +1962,12 @@ async fn start(
19591962
// let skipped_matcher = Matcher::Full(String::from("tgi_request_skipped_tokens"));
19601963
// let skipped_buckets: Vec<f64> = (0..shard_info.speculate + 1).map(|x| x as f64).collect();
19611964

1965+
let mut p_addr = addr;
1966+
p_addr.set_port(prometheus_port);
1967+
19621968
// Prometheus handler
19631969
let builder = PrometheusBuilder::new()
1970+
.with_http_listener(p_addr)
19641971
.set_buckets_for_metric(duration_matcher, &duration_buckets)
19651972
.unwrap()
19661973
.set_buckets_for_metric(input_length_matcher, &input_length_buckets)

0 commit comments

Comments
 (0)