Skip to content

Commit 4e5c32f

Browse files
committed
fix: Preserve Bedrock inference profile IDs in health checks
- Fixes issue where health checks were stripping inference profile IDs - Preserves cross-region inference profile prefixes (us., eu., apac., jp., au., us-gov., global.) - Strips only AWS region routing while preserving routes and handlers - Resolves both issue #15807 and inference profile requirement errors - Adds comprehensive tests for all Bedrock model format combinations Issue #15807 attempted to fix regional Bedrock model health checks but was too aggressive, stripping cross-region inference profile prefixes that AWS requires. This caused errors: "Invocation of model ID X with on-demand throughput isn't supported. Retry your request with the ID or ARN of an inference profile." The fix now correctly: - Strips AWS regions (us-west-2, eu-central-1, etc.) from routing - Preserves CRIS prefixes (us., eu., etc.) required by AWS - Preserves routes (converse/, invoke/) - Preserves handlers (llama/, deepseek_r1/) - Only affects Bedrock models (checked via startswith) Test coverage includes 20+ scenarios for all Bedrock model format combinations.
1 parent c0890e7 commit 4e5c32f

File tree

2 files changed

+144
-4
lines changed

2 files changed

+144
-4
lines changed

litellm/proxy/health_check.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,17 +138,50 @@ def _update_litellm_params_for_health_check(
138138
- gets a short `messages` param for health check
139139
- updates the `model` param with the `health_check_model` if it exists Doc: https://docs.litellm.ai/docs/proxy/health#wildcard-routes
140140
- updates the `voice` param with the `health_check_voice` for `audio_speech` mode if it exists Doc: https://docs.litellm.ai/docs/proxy/health#text-to-speech-models
141-
- updates the `model` param with the Bedrock base model name if it is a Bedrock model
141+
- for Bedrock models with region routing (bedrock/region/model), strips the litellm routing prefix but preserves the model ID
142142
"""
143143
litellm_params["messages"] = _get_random_llm_message()
144144
_health_check_model = model_info.get("health_check_model", None)
145145
if _health_check_model is not None:
146146
litellm_params["model"] = _health_check_model
147147
if model_info.get("mode", None) == "audio_speech":
148148
litellm_params["voice"] = model_info.get("health_check_voice", "alloy")
149-
if "bedrock" in litellm_params["model"]:
149+
150+
# Handle Bedrock region routing format: bedrock/region/model
151+
# This is needed because health checks bypass get_llm_provider() for the model param
152+
# Issue #15807: Without this, health checks send "region/model" as the model ID to AWS
153+
# which causes: "bedrock-runtime.../model/us-west-2/mistral.../invoke" (region in model ID)
154+
#
155+
# However, we must preserve cross-region inference profile prefixes like "us.", "eu.", etc.
156+
# Issue: Stripping these breaks AWS requirement for inference profile IDs
157+
#
158+
# Must also preserve route prefixes (converse/, invoke/) and handlers (llama/, deepseek_r1/, etc.)
159+
if litellm_params["model"].startswith("bedrock/"):
150160
from litellm.llms.bedrock.common_utils import BedrockModelInfo
151-
litellm_params["model"] = BedrockModelInfo.get_base_model(litellm_params["model"])
161+
162+
model = litellm_params["model"]
163+
# Strip only the bedrock/ prefix (preserve routes like converse/, invoke/)
164+
if model.startswith("bedrock/"):
165+
model = model[8:] # len("bedrock/") = 8
166+
167+
# Now check for region routing and strip it if present
168+
# Need to handle formats like:
169+
# - "us-west-2/model" → "model"
170+
# - "converse/us-west-2/model" → "converse/model"
171+
# - "llama/arn:..." → "llama/arn:..." (preserve handler)
172+
#
173+
# Strategy: Check each path segment, remove regions, preserve everything else
174+
parts = model.split("/")
175+
filtered_parts = []
176+
177+
for part in parts:
178+
# Skip AWS regions, keep everything else
179+
if part not in BedrockModelInfo.all_global_regions:
180+
filtered_parts.append(part)
181+
182+
model = "/".join(filtered_parts)
183+
litellm_params["model"] = model
184+
152185
return litellm_params
153186

154187

tests/litellm_utils_tests/test_health_check.py

Lines changed: 108 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,8 @@ def test_update_litellm_params_for_health_check():
302302
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
303303
assert "voice" not in updated_params
304304

305-
# Test with Bedrock model
305+
# Test with Bedrock model with region routing - should strip bedrock/ and region/ prefix
306+
# Issue #15807: Fixes health checks sending "region/model" as model ID to AWS
306307
model_info = {}
307308
litellm_params = {
308309
"model": "bedrock/us-gov-west-1/anthropic.claude-3-7-sonnet-20250219-v1:0",
@@ -311,6 +312,112 @@ def test_update_litellm_params_for_health_check():
311312
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
312313
assert updated_params["model"] == "anthropic.claude-3-7-sonnet-20250219-v1:0"
313314

315+
# Test with Bedrock cross-region inference profile - should preserve the inference profile prefix
316+
# AWS requires inference profile IDs like "us.anthropic.claude..." for cross-region routing
317+
litellm_params = {
318+
"model": "bedrock/us.anthropic.claude-3-5-sonnet-20240620-v1:0",
319+
"api_key": "fake_key",
320+
}
321+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
322+
assert updated_params["model"] == "us.anthropic.claude-3-5-sonnet-20240620-v1:0"
323+
324+
# Test with Bedrock model without region routing - should just strip bedrock/ prefix
325+
litellm_params = {
326+
"model": "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
327+
"api_key": "fake_key",
328+
}
329+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
330+
assert updated_params["model"] == "anthropic.claude-3-5-sonnet-20240620-v1:0"
331+
332+
# Test that non-Bedrock models are not affected by Bedrock-specific logic
333+
litellm_params = {
334+
"model": "openai/gpt-4",
335+
"api_key": "fake_key",
336+
}
337+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
338+
assert updated_params["model"] == "openai/gpt-4" # Should remain unchanged
339+
340+
# Test ALL cross-region inference profile prefixes (CRIS)
341+
cris_prefixes = ["us.", "eu.", "apac.", "jp.", "au.", "us-gov.", "global."]
342+
for prefix in cris_prefixes:
343+
litellm_params = {
344+
"model": f"bedrock/{prefix}anthropic.claude-3-haiku-20240307-v1:0",
345+
"api_key": "fake_key",
346+
}
347+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
348+
assert updated_params["model"] == f"{prefix}anthropic.claude-3-haiku-20240307-v1:0", \
349+
f"Failed to preserve CRIS prefix: {prefix}"
350+
351+
# Test regional + CRIS combination - region should be stripped, CRIS preserved
352+
litellm_params = {
353+
"model": "bedrock/us-east-2/us.anthropic.claude-3-haiku-20240307-v1:0",
354+
"api_key": "fake_key",
355+
}
356+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
357+
assert updated_params["model"] == "us.anthropic.claude-3-haiku-20240307-v1:0"
358+
359+
# Test GovCloud regions
360+
litellm_params = {
361+
"model": "bedrock/us-gov-east-1/anthropic.claude-instant-v1",
362+
"api_key": "fake_key",
363+
}
364+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
365+
assert updated_params["model"] == "anthropic.claude-instant-v1"
366+
367+
# Test imported models with handler prefixes - handlers should be preserved
368+
litellm_params = {
369+
"model": "bedrock/llama/arn:aws:bedrock:us-east-1:123:imported-model/abc",
370+
"api_key": "fake_key",
371+
}
372+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
373+
assert updated_params["model"] == "llama/arn:aws:bedrock:us-east-1:123:imported-model/abc"
374+
375+
litellm_params = {
376+
"model": "bedrock/deepseek_r1/arn:aws:bedrock:us-west-2:456:imported-model/xyz",
377+
"api_key": "fake_key",
378+
}
379+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
380+
assert updated_params["model"] == "deepseek_r1/arn:aws:bedrock:us-west-2:456:imported-model/xyz"
381+
382+
# Test route specifications - routes should be preserved
383+
litellm_params = {
384+
"model": "bedrock/converse/us.anthropic.claude-3-5-sonnet-20240620-v1:0",
385+
"api_key": "fake_key",
386+
}
387+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
388+
assert updated_params["model"] == "converse/us.anthropic.claude-3-5-sonnet-20240620-v1:0"
389+
390+
litellm_params = {
391+
"model": "bedrock/invoke/us-west-2/anthropic.claude-instant-v1",
392+
"api_key": "fake_key",
393+
}
394+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
395+
assert updated_params["model"] == "invoke/anthropic.claude-instant-v1"
396+
397+
# Test ARN formats - should be preserved
398+
litellm_params = {
399+
"model": "bedrock/arn:aws:bedrock:eu-central-1:000:application-inference-profile/abc",
400+
"api_key": "fake_key",
401+
}
402+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
403+
assert updated_params["model"] == "arn:aws:bedrock:eu-central-1:000:application-inference-profile/abc"
404+
405+
# Test edge case: region + handler + ARN
406+
litellm_params = {
407+
"model": "bedrock/us-west-2/llama/arn:aws:bedrock:us-east-1:123:imported-model/abc",
408+
"api_key": "fake_key",
409+
}
410+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
411+
assert updated_params["model"] == "llama/arn:aws:bedrock:us-east-1:123:imported-model/abc"
412+
413+
# Test edge case: route + region + CRIS
414+
litellm_params = {
415+
"model": "bedrock/converse/us-west-2/eu.anthropic.claude-3-sonnet-20240229-v1:0",
416+
"api_key": "fake_key",
417+
}
418+
updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
419+
assert updated_params["model"] == "converse/eu.anthropic.claude-3-sonnet-20240229-v1:0"
420+
314421
@pytest.mark.asyncio
315422
async def test_perform_health_check_with_health_check_model():
316423
"""

0 commit comments

Comments
 (0)