fix: Preserve Bedrock inference profile IDs in health checks

ylgibby · ylgibby · commit 4e5c32f8db02 · 2025-10-26T15:32:18.000-06:00
- Fixes issue where health checks were stripping inference profile IDs - Preserves cross-region inference profile prefixes (us., eu., apac., jp., au., us-gov., global.) - Strips only AWS region routing while preserving routes and handlers - Resolves both issue #15807 and inference profile requirement errors - Adds comprehensive tests for all Bedrock model format combinations Issue #15807 attempted to fix regional Bedrock model health checks but was too aggressive, stripping cross-region inference profile prefixes that AWS requires. This caused errors: "Invocation of model ID X with on-demand throughput isn't supported. Retry your request with the ID or ARN of an inference profile." The fix now correctly: - Strips AWS regions (us-west-2, eu-central-1, etc.) from routing - Preserves CRIS prefixes (us., eu., etc.) required by AWS - Preserves routes (converse/, invoke/) - Preserves handlers (llama/, deepseek_r1/) - Only affects Bedrock models (checked via startswith) Test coverage includes 20+ scenarios for all Bedrock model format combinations.
diff --git a/litellm/proxy/health_check.py b/litellm/proxy/health_check.py
@@ -138,17 +138,50 @@ def _update_litellm_params_for_health_check(
     - gets a short `messages` param for health check
     - updates the `model` param with the `health_check_model` if it exists Doc: https://docs.litellm.ai/docs/proxy/health#wildcard-routes
     - updates the `voice` param with the `health_check_voice` for `audio_speech` mode if it exists Doc: https://docs.litellm.ai/docs/proxy/health#text-to-speech-models
-    - updates the `model` param with the Bedrock base model name if it is a Bedrock model
+    - for Bedrock models with region routing (bedrock/region/model), strips the litellm routing prefix but preserves the model ID
     """
     litellm_params["messages"] = _get_random_llm_message()
     _health_check_model = model_info.get("health_check_model", None)
     if _health_check_model is not None:
         litellm_params["model"] = _health_check_model
     if model_info.get("mode", None) == "audio_speech":
         litellm_params["voice"] = model_info.get("health_check_voice", "alloy")
-    if "bedrock" in litellm_params["model"]:
+
+    # Handle Bedrock region routing format: bedrock/region/model
+    # This is needed because health checks bypass get_llm_provider() for the model param
+    # Issue #15807: Without this, health checks send "region/model" as the model ID to AWS
+    # which causes: "bedrock-runtime.../model/us-west-2/mistral.../invoke" (region in model ID)
+    #
+    # However, we must preserve cross-region inference profile prefixes like "us.", "eu.", etc.
+    # Issue: Stripping these breaks AWS requirement for inference profile IDs
+    #
+    # Must also preserve route prefixes (converse/, invoke/) and handlers (llama/, deepseek_r1/, etc.)
+    if litellm_params["model"].startswith("bedrock/"):
         from litellm.llms.bedrock.common_utils import BedrockModelInfo
-        litellm_params["model"] = BedrockModelInfo.get_base_model(litellm_params["model"])
+
+        model = litellm_params["model"]
+        # Strip only the bedrock/ prefix (preserve routes like converse/, invoke/)
+        if model.startswith("bedrock/"):
+            model = model[8:]  # len("bedrock/") = 8
+
+        # Now check for region routing and strip it if present
+        # Need to handle formats like:
+        # - "us-west-2/model" → "model"
+        # - "converse/us-west-2/model" → "converse/model"
+        # - "llama/arn:..." → "llama/arn:..." (preserve handler)
+        #
+        # Strategy: Check each path segment, remove regions, preserve everything else
+        parts = model.split("/")
+        filtered_parts = []
+
+        for part in parts:
+            # Skip AWS regions, keep everything else
+            if part not in BedrockModelInfo.all_global_regions:
+                filtered_parts.append(part)
+
+        model = "/".join(filtered_parts)
+        litellm_params["model"] = model
+
     return litellm_params
 
 
diff --git a/tests/litellm_utils_tests/test_health_check.py b/tests/litellm_utils_tests/test_health_check.py
@@ -302,7 +302,8 @@ def test_update_litellm_params_for_health_check():
     updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
     assert "voice" not in updated_params
 
-    # Test with Bedrock model
+    # Test with Bedrock model with region routing - should strip bedrock/ and region/ prefix
+    # Issue #15807: Fixes health checks sending "region/model" as model ID to AWS
     model_info = {}
     litellm_params = {
         "model": "bedrock/us-gov-west-1/anthropic.claude-3-7-sonnet-20250219-v1:0",
@@ -311,6 +312,112 @@ def test_update_litellm_params_for_health_check():
     updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
     assert updated_params["model"] == "anthropic.claude-3-7-sonnet-20250219-v1:0"
 
+    # Test with Bedrock cross-region inference profile - should preserve the inference profile prefix
+    # AWS requires inference profile IDs like "us.anthropic.claude..." for cross-region routing
+    litellm_params = {
+        "model": "bedrock/us.anthropic.claude-3-5-sonnet-20240620-v1:0",
+        "api_key": "fake_key",
+    }
+    updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
+    assert updated_params["model"] == "us.anthropic.claude-3-5-sonnet-20240620-v1:0"
+
+    # Test with Bedrock model without region routing - should just strip bedrock/ prefix
+    litellm_params = {
+        "model": "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
+        "api_key": "fake_key",
+    }
+    updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
+    assert updated_params["model"] == "anthropic.claude-3-5-sonnet-20240620-v1:0"
+
+    # Test that non-Bedrock models are not affected by Bedrock-specific logic
+    litellm_params = {
+        "model": "openai/gpt-4",
+        "api_key": "fake_key",
+    }
+    updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
+    assert updated_params["model"] == "openai/gpt-4"  # Should remain unchanged
+
+    # Test ALL cross-region inference profile prefixes (CRIS)
+    cris_prefixes = ["us.", "eu.", "apac.", "jp.", "au.", "us-gov.", "global."]
+    for prefix in cris_prefixes:
+        litellm_params = {
+            "model": f"bedrock/{prefix}anthropic.claude-3-haiku-20240307-v1:0",
+            "api_key": "fake_key",
+        }
+        updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
+        assert updated_params["model"] == f"{prefix}anthropic.claude-3-haiku-20240307-v1:0", \
+            f"Failed to preserve CRIS prefix: {prefix}"
+
+    # Test regional + CRIS combination - region should be stripped, CRIS preserved
+    litellm_params = {
+        "model": "bedrock/us-east-2/us.anthropic.claude-3-haiku-20240307-v1:0",
+        "api_key": "fake_key",
+    }
+    updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
+    assert updated_params["model"] == "us.anthropic.claude-3-haiku-20240307-v1:0"
+
+    # Test GovCloud regions
+    litellm_params = {
+        "model": "bedrock/us-gov-east-1/anthropic.claude-instant-v1",
+        "api_key": "fake_key",
+    }
+    updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
+    assert updated_params["model"] == "anthropic.claude-instant-v1"
+
+    # Test imported models with handler prefixes - handlers should be preserved
+    litellm_params = {
+        "model": "bedrock/llama/arn:aws:bedrock:us-east-1:123:imported-model/abc",
+        "api_key": "fake_key",
+    }
+    updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
+    assert updated_params["model"] == "llama/arn:aws:bedrock:us-east-1:123:imported-model/abc"
+
+    litellm_params = {
+        "model": "bedrock/deepseek_r1/arn:aws:bedrock:us-west-2:456:imported-model/xyz",
+        "api_key": "fake_key",
+    }
+    updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
+    assert updated_params["model"] == "deepseek_r1/arn:aws:bedrock:us-west-2:456:imported-model/xyz"
+
+    # Test route specifications - routes should be preserved
+    litellm_params = {
+        "model": "bedrock/converse/us.anthropic.claude-3-5-sonnet-20240620-v1:0",
+        "api_key": "fake_key",
+    }
+    updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
+    assert updated_params["model"] == "converse/us.anthropic.claude-3-5-sonnet-20240620-v1:0"
+
+    litellm_params = {
+        "model": "bedrock/invoke/us-west-2/anthropic.claude-instant-v1",
+        "api_key": "fake_key",
+    }
+    updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
+    assert updated_params["model"] == "invoke/anthropic.claude-instant-v1"
+
+    # Test ARN formats - should be preserved
+    litellm_params = {
+        "model": "bedrock/arn:aws:bedrock:eu-central-1:000:application-inference-profile/abc",
+        "api_key": "fake_key",
+    }
+    updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
+    assert updated_params["model"] == "arn:aws:bedrock:eu-central-1:000:application-inference-profile/abc"
+
+    # Test edge case: region + handler + ARN
+    litellm_params = {
+        "model": "bedrock/us-west-2/llama/arn:aws:bedrock:us-east-1:123:imported-model/abc",
+        "api_key": "fake_key",
+    }
+    updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
+    assert updated_params["model"] == "llama/arn:aws:bedrock:us-east-1:123:imported-model/abc"
+
+    # Test edge case: route + region + CRIS
+    litellm_params = {
+        "model": "bedrock/converse/us-west-2/eu.anthropic.claude-3-sonnet-20240229-v1:0",
+        "api_key": "fake_key",
+    }
+    updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
+    assert updated_params["model"] == "converse/eu.anthropic.claude-3-sonnet-20240229-v1:0"
+
 @pytest.mark.asyncio
 async def test_perform_health_check_with_health_check_model():
     """