Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions docs/my-website/docs/providers/vertex.md
Original file line number Diff line number Diff line change
Expand Up @@ -1604,6 +1604,53 @@ litellm.vertex_location = "us-central1 # Your Location
| gemini-2.5-flash-preview-09-2025 | `completion('gemini-2.5-flash-preview-09-2025', messages)`, `completion('vertex_ai/gemini-2.5-flash-preview-09-2025', messages)` |
| gemini-2.5-flash-lite-preview-09-2025 | `completion('gemini-2.5-flash-lite-preview-09-2025', messages)`, `completion('vertex_ai/gemini-2.5-flash-lite-preview-09-2025', messages)` |

## Private Service Connect (PSC) Endpoints

LiteLLM supports Vertex AI models deployed to Private Service Connect (PSC) endpoints, allowing you to use custom `api_base` URLs for private deployments.

### Usage

```python
from litellm import completion

# Use PSC endpoint with custom api_base
response = completion(
model="vertex_ai/1234567890", # Numeric endpoint ID
messages=[{"role": "user", "content": "Hello!"}],
api_base="http://10.96.32.8", # Your PSC endpoint
vertex_project="my-project-id",
vertex_location="us-central1"
)
```

**Key Features:**
- Supports both numeric endpoint IDs and custom model names
- Works with both completion and embedding endpoints
- Automatically constructs full PSC URL: `{api_base}/v1/projects/{project}/locations/{location}/endpoints/{model}:{endpoint}`
- Compatible with streaming requests

### Configuration

Add PSC endpoints to your `config.yaml`:

```yaml
model_list:
- model_name: psc-gemini
litellm_params:
model: vertex_ai/1234567890 # Numeric endpoint ID
api_base: "http://10.96.32.8" # Your PSC endpoint
vertex_project: "my-project-id"
vertex_location: "us-central1"
vertex_credentials: "/path/to/service_account.json"
- model_name: psc-embedding
litellm_params:
model: vertex_ai/text-embedding-004
api_base: "http://10.96.32.8" # Your PSC endpoint
vertex_project: "my-project-id"
vertex_location: "us-central1"
vertex_credentials: "/path/to/service_account.json"
```

## Fine-tuned Models

You can call fine-tuned Vertex AI Gemini models through LiteLLM
Expand Down
8 changes: 8 additions & 0 deletions litellm/llms/vertex_ai/batches/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ def create_batch(
stream=None,
auth_header=None,
url=default_api_base,
model=None,
vertex_project=vertex_project or project_id,
vertex_location=vertex_location or "us-central1",
vertex_api_version="v1",
)

headers = {
Expand Down Expand Up @@ -166,6 +170,10 @@ def retrieve_batch(
stream=None,
auth_header=None,
url=default_api_base,
model=None,
vertex_project=vertex_project or project_id,
vertex_location=vertex_location or "us-central1",
vertex_api_version="v1",
)

headers = {
Expand Down
8 changes: 8 additions & 0 deletions litellm/llms/vertex_ai/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ def get_vertex_ai_model_route(model: str, litellm_params: Optional[dict] = None)

>>> get_vertex_ai_model_route("openai/gpt-oss-120b")
VertexAIModelRoute.MODEL_GARDEN

>>> get_vertex_ai_model_route("1234567890", {"api_base": "http://10.96.32.8"})
VertexAIModelRoute.GEMINI # Numeric endpoints with api_base use HTTP path
"""
from litellm.llms.vertex_ai.vertex_ai_partner_models.main import (
VertexAIPartnerModels,
Expand All @@ -67,6 +70,11 @@ def get_vertex_ai_model_route(model: str, litellm_params: Optional[dict] = None)
if "gemini" in litellm_params["base_model"]:
return VertexAIModelRoute.GEMINI

# Check if numeric endpoint ID with custom api_base (PSC endpoint)
# Route to GEMINI (HTTP path) to support PSC endpoints properly
if model.isdigit() and litellm_params and litellm_params.get("api_base"):
return VertexAIModelRoute.GEMINI

# Check for partner models (llama, mistral, claude, etc.)
if VertexAIPartnerModels.is_vertex_partner_model(model=model):
return VertexAIModelRoute.PARTNER_MODELS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ def _get_token_and_url_context_caching(
stream=None,
auth_header=auth_header,
url=url,
model=None,
vertex_project=vertex_project,
vertex_location=vertex_location,
vertex_api_version="v1beta1" if custom_llm_provider == "vertex_ai_beta" else "v1",
)

def check_cache(
Expand Down
3 changes: 3 additions & 0 deletions litellm/llms/vertex_ai/vertex_embeddings/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ def _transform_openai_request_to_fine_tuned_embedding_request(
vertex_request["parameters"] = TextEmbeddingFineTunedParameters(
**optional_params
)
# Remove 'shared_session' from parameters if present
if vertex_request["parameters"] is not None and "shared_session" in vertex_request["parameters"]:
del vertex_request["parameters"]["shared_session"] # type: ignore[typeddict-item]

return vertex_request

Expand Down
48 changes: 45 additions & 3 deletions litellm/llms/vertex_ai/vertex_llm_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,9 @@ def get_complete_vertex_url(
auth_header=None,
url=default_api_base,
model=model,
vertex_project=vertex_project or project_id,
vertex_location=vertex_location or "us-central1",
vertex_api_version="v1", # Partner models typically use v1
)
return api_base

Expand Down Expand Up @@ -289,9 +292,18 @@ def _check_custom_proxy(
auth_header: Optional[str],
url: str,
model: Optional[str] = None,
vertex_project: Optional[str] = None,
vertex_location: Optional[str] = None,
vertex_api_version: Optional[Literal["v1", "v1beta1"]] = None,
) -> Tuple[Optional[str], str]:
"""
for cloudflare ai gateway - https://github.com/BerriAI/litellm/issues/4317

Handles custom api_base for:
1. Gemini (Google AI Studio) - constructs /models/{model}:{endpoint}
2. Vertex AI with standard proxies - constructs {api_base}:{endpoint}
3. Vertex AI with PSC endpoints - constructs full path structure
{api_base}/v1/projects/{project}/locations/{location}/endpoints/{model}:{endpoint}

## Returns
- (auth_header, url) - Tuple[Optional[str], str]
Expand All @@ -312,8 +324,34 @@ def _check_custom_proxy(
gemini_api_key # cloudflare expects api key as bearer token
)
else:
url = "{}:{}".format(api_base, endpoint)

# For Vertex AI
# Check if this is a PSC endpoint or custom deployment
# PSC/custom endpoints need the full path structure
if vertex_project and vertex_location and model:
# Check if model is numeric (endpoint ID) or if api_base doesn't contain googleapis.com
# These are indicators of PSC/custom endpoints
is_psc_or_custom = (
"googleapis.com" not in api_base.lower() or model.isdigit()
)

if is_psc_or_custom:
# Construct full PSC/custom endpoint URL
# Format: {api_base}/v1/projects/{project}/locations/{location}/endpoints/{model}:{endpoint}
version = vertex_api_version or "v1"
url = "{}/{}/projects/{}/locations/{}/endpoints/{}:{}".format(
api_base.rstrip("/"),
version,
vertex_project,
vertex_location,
model,
endpoint,
)
else:
# Standard proxy - just append endpoint
url = "{}:{}".format(api_base, endpoint)
else:
# Fallback to simple format if we don't have all parameters
url = "{}:{}".format(api_base, endpoint)
if stream is True:
url = url + "?alt=sse"
return auth_header, url
Expand All @@ -340,6 +378,7 @@ def _get_token_and_url(
Returns
token, url
"""
version: Optional[Literal["v1beta1", "v1"]] = None
if custom_llm_provider == "gemini":
url, endpoint = _get_gemini_url(
mode=mode,
Expand All @@ -355,7 +394,7 @@ def _get_token_and_url(
)

### SET RUNTIME ENDPOINT ###
version: Literal["v1beta1", "v1"] = (
version = (
"v1beta1" if should_use_v1beta1_features is True else "v1"
)
url, endpoint = _get_vertex_url(
Expand All @@ -376,6 +415,9 @@ def _get_token_and_url(
stream=stream,
url=url,
model=model,
vertex_project=vertex_project,
vertex_location=vertex_location,
vertex_api_version=version,
)

def _handle_reauthentication(
Expand Down
4 changes: 4 additions & 0 deletions litellm/llms/vertex_ai/vertex_model_garden/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ def completion(
stream=stream,
auth_header=None,
url=default_api_base,
model=model,
vertex_project=vertex_project or project_id,
vertex_location=vertex_location or "us-central1",
vertex_api_version="v1beta1",
)
model = ""
return openai_like_chat_completions.completion(
Expand Down
Loading
Loading