Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 55 additions & 16 deletions litellm/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,9 @@ def __init__( # noqa: PLR0915

# Initialize model ID to deployment index mapping for O(1) lookups
self.model_id_to_deployment_index_map: Dict[str, int] = {}
# Initialize model name to deployment indices mapping for O(1) lookups
# Maps model_name -> list of indices in model_list
self.model_name_to_deployment_indices: Dict[str, List[int]] = {}

if model_list is not None:
# Build model index immediately to enable O(1) lookups from the start
Expand Down Expand Up @@ -5097,6 +5100,7 @@ def set_model_list(self, model_list: list):
original_model_list = copy.deepcopy(model_list)
self.model_list = []
self.model_id_to_deployment_index_map = {} # Reset the index
self.model_name_to_deployment_indices = {} # Reset the model_name index
# we add api_base/api_key each model so load balancing between azure/gpt on api_base1 and api_base2 works

for model in original_model_list:
Expand Down Expand Up @@ -5138,6 +5142,9 @@ def set_model_list(self, model_list: list):
f"\nInitialized Model List {self.get_model_names()}"
)
self.model_names = [m["model_name"] for m in model_list]

# Build model_name index for O(1) lookups
self._build_model_name_index(self.model_list)

def _add_deployment(self, deployment: Deployment) -> Deployment:
import os
Expand Down Expand Up @@ -5365,20 +5372,27 @@ def _add_model_to_list_and_index_map(
self, model: dict, model_id: Optional[str] = None
) -> None:
"""
Helper method to add a model to the model_list and update the model_id_to_deployment_index_map.
Helper method to add a model to the model_list and update both indices.

Parameters:
- model: dict - the model to add to the list
- model_id: Optional[str] - the model ID to use for indexing. If None, will try to get from model["model_info"]["id"]
"""
idx = len(self.model_list)
self.model_list.append(model)
# Update model index for O(1) lookup

# Update model_id index for O(1) lookup
if model_id is not None:
self.model_id_to_deployment_index_map[model_id] = len(self.model_list) - 1
self.model_id_to_deployment_index_map[model_id] = idx
elif model.get("model_info", {}).get("id") is not None:
self.model_id_to_deployment_index_map[model["model_info"]["id"]] = (
len(self.model_list) - 1
)
self.model_id_to_deployment_index_map[model["model_info"]["id"]] = idx

# Update model_name index for O(1) lookup
model_name = model.get("model_name")
if model_name:
if model_name not in self.model_name_to_deployment_indices:
self.model_name_to_deployment_indices[model_name] = []
self.model_name_to_deployment_indices[model_name].append(idx)

def upsert_deployment(self, deployment: Deployment) -> Optional[Deployment]:
"""
Expand Down Expand Up @@ -6094,6 +6108,22 @@ async def set_response_headers(
additional_headers[header] = value
return response

def _build_model_name_index(self, model_list: list) -> None:
"""
Build model_name -> deployment indices mapping for O(1) lookups.

This index allows us to find all deployments for a given model_name in O(1) time
instead of O(n) linear scan through the entire model_list.
"""
self.model_name_to_deployment_indices.clear()

for idx, model in enumerate(model_list):
model_name = model.get("model_name")
if model_name:
if model_name not in self.model_name_to_deployment_indices:
self.model_name_to_deployment_indices[model_name] = []
self.model_name_to_deployment_indices[model_name].append(idx)

def _build_model_id_to_deployment_index_map(self, model_list: list):
"""
Build model index from model list to enable O(1) lookups immediately.
Expand Down Expand Up @@ -6198,18 +6228,27 @@ def _get_all_deployments(
Used for accurate 'get_model_list'.

if team_id specified, only return team-specific models

Optimized with O(1) index lookup instead of O(n) linear scan.
"""
returned_models: List[DeploymentTypedDict] = []
for model in self.model_list:
if self.should_include_deployment(
model_name=model_name, model=model, team_id=team_id
):
if model_alias is not None:
alias_model = copy.deepcopy(model)
alias_model["model_name"] = model_alias
returned_models.append(alias_model)
else:
returned_models.append(model)

# O(1) lookup in model_name index
if model_name in self.model_name_to_deployment_indices:
indices = self.model_name_to_deployment_indices[model_name]

# O(k) where k = deployments for this model_name (typically 1-10)
for idx in indices:
model = self.model_list[idx]
if self.should_include_deployment(
model_name=model_name, model=model, team_id=team_id
):
if model_alias is not None:
alias_model = copy.deepcopy(model)
alias_model["model_name"] = model_alias
returned_models.append(alias_model)
else:
returned_models.append(model)

return returned_models

Expand Down
52 changes: 51 additions & 1 deletion tests/router_unit_tests/test_router_index_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ def test_add_model_to_list_and_index_map_from_model_info(self, router):
# Verify: Index map uses model_info.id
assert router.model_id_to_deployment_index_map["model-info-id"] == 0


def test_add_model_to_list_and_index_map_multiple_models(self, router):
"""Test _add_model_to_list_and_index_map with multiple models to verify indexing"""
# Setup: Empty router
Expand Down Expand Up @@ -127,3 +126,54 @@ def test_has_model_id(self, router):
# Test: Empty router
empty_router = Router(model_list=[])
assert empty_router.has_model_id("any-id") == False

def test_build_model_name_index(self, router):
"""Test _build_model_name_index function"""
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {"model": "gpt-3.5-turbo"},
"model_info": {"id": "model-1"},
},
{
"model_name": "gpt-4",
"litellm_params": {"model": "gpt-4"},
"model_info": {"id": "model-2"},
},
{
"model_name": "gpt-4", # Duplicate model_name, different deployment
"litellm_params": {"model": "gpt-4"},
"model_info": {"id": "model-3"},
},
]

# Test: Build index from model list
router._build_model_name_index(model_list)

# Verify: model_name_to_deployment_indices is correctly built
assert "gpt-3.5-turbo" in router.model_name_to_deployment_indices
assert "gpt-4" in router.model_name_to_deployment_indices

# Verify: gpt-3.5-turbo has single deployment
assert router.model_name_to_deployment_indices["gpt-3.5-turbo"] == [0]

# Verify: gpt-4 has multiple deployments
assert router.model_name_to_deployment_indices["gpt-4"] == [1, 2]

# Test: Rebuild index (should clear and rebuild)
new_model_list = [
{
"model_name": "claude-3",
"litellm_params": {"model": "claude-3"},
"model_info": {"id": "model-4"},
},
]
router._build_model_name_index(new_model_list)

# Verify: Old entries are cleared
assert "gpt-3.5-turbo" not in router.model_name_to_deployment_indices
assert "gpt-4" not in router.model_name_to_deployment_indices

# Verify: New entry is added
assert "claude-3" in router.model_name_to_deployment_indices
assert router.model_name_to_deployment_indices["claude-3"] == [0]
Loading