Skip to content

Commit 2bef7c3

Browse files
authored
fix: Preserve Bedrock inference profile IDs in health checks (#15947)
* fix: Preserve Bedrock inference profile IDs in health checks - Fixes issue where health checks were stripping inference profile IDs - Preserves cross-region inference profile prefixes (us., eu., apac., jp., au., us-gov., global.) - Strips only AWS region routing while preserving routes and handlers - Resolves both issue #15807 and inference profile requirement errors - Adds comprehensive tests for all Bedrock model format combinations Issue #15807 attempted to fix regional Bedrock model health checks but was too aggressive, stripping cross-region inference profile prefixes that AWS requires. This caused errors: "Invocation of model ID X with on-demand throughput isn't supported. Retry your request with the ID or ARN of an inference profile." The fix now correctly: - Strips AWS regions (us-west-2, eu-central-1, etc.) from routing - Preserves CRIS prefixes (us., eu., etc.) required by AWS - Preserves routes (converse/, invoke/) - Preserves handlers (llama/, deepseek_r1/) - Only affects Bedrock models (checked via startswith) Test coverage includes 20+ scenarios for all Bedrock model format combinations. * Remove unused traceback import
1 parent 8b33328 commit 2bef7c3

File tree

2 files changed

+144
-5
lines changed

2 files changed

+144
-5
lines changed

litellm/proxy/health_check.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,17 +138,50 @@ def _update_litellm_params_for_health_check(
138138
- gets a short `messages` param for health check
139139
- updates the `model` param with the `health_check_model` if it exists Doc: https://docs.litellm.ai/docs/proxy/health#wildcard-routes
140140
- updates the `voice` param with the `health_check_voice` for `audio_speech` mode if it exists Doc: https://docs.litellm.ai/docs/proxy/health#text-to-speech-models
141-
- updates the `model` param with the Bedrock base model name if it is a Bedrock model
141+
- for Bedrock models with region routing (bedrock/region/model), strips the litellm routing prefix but preserves the model ID
142142
"""
143143
litellm_params["messages"] = _get_random_llm_message()
144144
_health_check_model = model_info.get("health_check_model", None)
145145
if _health_check_model is not None:
146146
litellm_params["model"] = _health_check_model
147147
if model_info.get("mode", None) == "audio_speech":
148148
litellm_params["voice"] = model_info.get("health_check_voice", "alloy")
149-
if "bedrock" in litellm_params["model"]:
149+
150+
# Handle Bedrock region routing format: bedrock/region/model
151+
# This is needed because health checks bypass get_llm_provider() for the model param
152+
# Issue #15807: Without this, health checks send "region/model" as the model ID to AWS
153+
# which causes: "bedrock-runtime.../model/us-west-2/mistral.../invoke" (region in model ID)
154+
#
155+
# However, we must preserve cross-region inference profile prefixes like "us.", "eu.", etc.
156+
# Issue: Stripping these breaks AWS requirement for inference profile IDs
157+
#
158+
# Must also preserve route prefixes (converse/, invoke/) and handlers (llama/, deepseek_r1/, etc.)
159+
if litellm_params["model"].startswith("bedrock/"):
150160
from litellm.llms.bedrock.common_utils import BedrockModelInfo
151-
litellm_params["model"] = BedrockModelInfo.get_base_model(litellm_params["model"])
161+
162+
model = litellm_params["model"]
163+
# Strip only the bedrock/ prefix (preserve routes like converse/, invoke/)
164+
if model.startswith("bedrock/"):
165+
model = model[8:] # len("bedrock/") = 8
166+
167+
# Now check for region routing and strip it if present
168+
# Need to handle formats like:
169+
# - "us-west-2/model" → "model"
170+
# - "converse/us-west-2/model" → "converse/model"
171+
# - "llama/arn:..." → "llama/arn:..." (preserve handler)
172+
#
173+
# Strategy: Check each path segment, remove regions, preserve everything else
174+
parts = model.split("/")
175+
filtered_parts = []
176+
177+
for part in parts:
178+
# Skip AWS regions, keep everything else
179+
if part not in BedrockModelInfo.all_global_regions:
180+
filtered_parts.append(part)
181+
182+
model = "/".join(filtered_parts)
183+
litellm_params["model"] = model
184+
152185
return litellm_params
153186

154187

tests/litellm_utils_tests/test_health_check.py

Lines changed: 108 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
import os
55
import sys
6-
import traceback
76

87
import pytest
98
from unittest.mock import AsyncMock, patch
@@ -302,7 +301,8 @@ def test_update_litellm_params_for_health_check():
302301
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
303302
assert "voice" not in updated_params
304303

305-
# Test with Bedrock model
304+
# Test with Bedrock model with region routing - should strip bedrock/ and region/ prefix
305+
# Issue #15807: Fixes health checks sending "region/model" as model ID to AWS
306306
model_info = {}
307307
litellm_params = {
308308
"model": "bedrock/us-gov-west-1/anthropic.claude-3-7-sonnet-20250219-v1:0",
@@ -311,6 +311,112 @@ def test_update_litellm_params_for_health_check():
311311
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
312312
assert updated_params["model"] == "anthropic.claude-3-7-sonnet-20250219-v1:0"
313313

314+
# Test with Bedrock cross-region inference profile - should preserve the inference profile prefix
315+
# AWS requires inference profile IDs like "us.anthropic.claude..." for cross-region routing
316+
litellm_params = {
317+
"model": "bedrock/us.anthropic.claude-3-5-sonnet-20240620-v1:0",
318+
"api_key": "fake_key",
319+
}
320+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
321+
assert updated_params["model"] == "us.anthropic.claude-3-5-sonnet-20240620-v1:0"
322+
323+
# Test with Bedrock model without region routing - should just strip bedrock/ prefix
324+
litellm_params = {
325+
"model": "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
326+
"api_key": "fake_key",
327+
}
328+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
329+
assert updated_params["model"] == "anthropic.claude-3-5-sonnet-20240620-v1:0"
330+
331+
# Test that non-Bedrock models are not affected by Bedrock-specific logic
332+
litellm_params = {
333+
"model": "openai/gpt-4",
334+
"api_key": "fake_key",
335+
}
336+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
337+
assert updated_params["model"] == "openai/gpt-4" # Should remain unchanged
338+
339+
# Test ALL cross-region inference profile prefixes (CRIS)
340+
cris_prefixes = ["us.", "eu.", "apac.", "jp.", "au.", "us-gov.", "global."]
341+
for prefix in cris_prefixes:
342+
litellm_params = {
343+
"model": f"bedrock/{prefix}anthropic.claude-3-haiku-20240307-v1:0",
344+
"api_key": "fake_key",
345+
}
346+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
347+
assert updated_params["model"] == f"{prefix}anthropic.claude-3-haiku-20240307-v1:0", \
348+
f"Failed to preserve CRIS prefix: {prefix}"
349+
350+
# Test regional + CRIS combination - region should be stripped, CRIS preserved
351+
litellm_params = {
352+
"model": "bedrock/us-east-2/us.anthropic.claude-3-haiku-20240307-v1:0",
353+
"api_key": "fake_key",
354+
}
355+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
356+
assert updated_params["model"] == "us.anthropic.claude-3-haiku-20240307-v1:0"
357+
358+
# Test GovCloud regions
359+
litellm_params = {
360+
"model": "bedrock/us-gov-east-1/anthropic.claude-instant-v1",
361+
"api_key": "fake_key",
362+
}
363+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
364+
assert updated_params["model"] == "anthropic.claude-instant-v1"
365+
366+
# Test imported models with handler prefixes - handlers should be preserved
367+
litellm_params = {
368+
"model": "bedrock/llama/arn:aws:bedrock:us-east-1:123:imported-model/abc",
369+
"api_key": "fake_key",
370+
}
371+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
372+
assert updated_params["model"] == "llama/arn:aws:bedrock:us-east-1:123:imported-model/abc"
373+
374+
litellm_params = {
375+
"model": "bedrock/deepseek_r1/arn:aws:bedrock:us-west-2:456:imported-model/xyz",
376+
"api_key": "fake_key",
377+
}
378+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
379+
assert updated_params["model"] == "deepseek_r1/arn:aws:bedrock:us-west-2:456:imported-model/xyz"
380+
381+
# Test route specifications - routes should be preserved
382+
litellm_params = {
383+
"model": "bedrock/converse/us.anthropic.claude-3-5-sonnet-20240620-v1:0",
384+
"api_key": "fake_key",
385+
}
386+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
387+
assert updated_params["model"] == "converse/us.anthropic.claude-3-5-sonnet-20240620-v1:0"
388+
389+
litellm_params = {
390+
"model": "bedrock/invoke/us-west-2/anthropic.claude-instant-v1",
391+
"api_key": "fake_key",
392+
}
393+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
394+
assert updated_params["model"] == "invoke/anthropic.claude-instant-v1"
395+
396+
# Test ARN formats - should be preserved
397+
litellm_params = {
398+
"model": "bedrock/arn:aws:bedrock:eu-central-1:000:application-inference-profile/abc",
399+
"api_key": "fake_key",
400+
}
401+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
402+
assert updated_params["model"] == "arn:aws:bedrock:eu-central-1:000:application-inference-profile/abc"
403+
404+
# Test edge case: region + handler + ARN
405+
litellm_params = {
406+
"model": "bedrock/us-west-2/llama/arn:aws:bedrock:us-east-1:123:imported-model/abc",
407+
"api_key": "fake_key",
408+
}
409+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
410+
assert updated_params["model"] == "llama/arn:aws:bedrock:us-east-1:123:imported-model/abc"
411+
412+
# Test edge case: route + region + CRIS
413+
litellm_params = {
414+
"model": "bedrock/converse/us-west-2/eu.anthropic.claude-3-sonnet-20240229-v1:0",
415+
"api_key": "fake_key",
416+
}
417+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
418+
assert updated_params["model"] == "converse/eu.anthropic.claude-3-sonnet-20240229-v1:0"
419+
314420
@pytest.mark.asyncio
315421
async def test_perform_health_check_with_health_check_model():
316422
"""

0 commit comments

Comments
 (0)