-
Notifications
You must be signed in to change notification settings - Fork 1.5k
[aks-agent] Add Microsoft Entra ID authentication support for Azure OpenAI #9719
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -232,7 +232,7 @@ def _populate_api_keys_from_secret(self): | |
| ) | ||
|
|
||
| if not secret.data: | ||
| logger.warning("Secret '%s' exists but has no data", self.llm_secret_name) | ||
| logger.debug("Secret '%s' exists but has no data", self.llm_secret_name) | ||
| return | ||
|
|
||
| # Decode secret data (base64 encoded) | ||
|
|
@@ -927,6 +927,22 @@ def _create_helm_values(self): | |
| "create": False, | ||
| } | ||
|
|
||
| # Configure aks-agent pod to use the same service account as aks-mcp for workload identity | ||
| helm_values["workloadIdentity"] = { | ||
| "enabled": True, | ||
| } | ||
| helm_values["serviceAccount"] = { | ||
| "create": False, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. workload identity is enabled unconditionally for all deployments These Helm values ( This could cause deployment failures or unexpected behavior on clusters without workload identity. Consider gating this behind the has_empty_api_key = any(
not (model_config.get("api_key") or "").strip()
for model_config in self.llm_config_manager.model_list.values()
)
if has_empty_api_key:
helm_values["workloadIdentity"] = {"enabled": True}
helm_values["serviceAccount"] = {
"create": False,
"name": self.aks_mcp_service_account_name,
}
helm_values["azureADTokenAuth"] = True
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's also by design. It's ok to not have workload identity. |
||
| "name": self.aks_mcp_service_account_name, | ||
| } | ||
|
|
||
| has_empty_api_key = any( | ||
| not model_config.get("api_key") or not model_config.get("api_key").strip() | ||
| for model_config in self.llm_config_manager.model_list.values() | ||
mainred marked this conversation as resolved.
Show resolved
Hide resolved
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: the empty api_key condition can be simplified. The has_empty_api_key = any(
not (model_config.get("api_key") or "").strip()
for model_config in self.llm_config_manager.model_list.values()
)
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. each llm provider with check the emptiness of the apikey |
||
| ) | ||
| if has_empty_api_key: | ||
| helm_values["azureADTokenAuth"] = True | ||
mainred marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| return helm_values | ||
|
|
||
| def save_llm_config(self, provider: LLMProvider, params: dict) -> None: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,7 @@ | |
|
|
||
| from .anthropic_provider import AnthropicProvider | ||
| from .azure_provider import AzureProvider | ||
| from .azure_entraid_provider import AzureEntraIDProvider | ||
| from .base import LLMProvider | ||
| from .gemini_provider import GeminiProvider | ||
| from .openai_compatible_provider import OpenAICompatibleProvider | ||
|
|
@@ -19,11 +20,11 @@ | |
|
|
||
| _PROVIDER_CLASSES: List[LLMProvider] = [ | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potential bug: Both This could cause issues when looking up a provider by name. Consider giving
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's by design. We want two kinds of azure openai. |
||
| AzureProvider, | ||
| AzureEntraIDProvider, | ||
| OpenAIProvider, | ||
| AnthropicProvider, | ||
| GeminiProvider, | ||
| OpenAICompatibleProvider, | ||
| # Add new providers here | ||
| ] | ||
|
|
||
| PROVIDER_REGISTRY = {} | ||
|
|
@@ -49,8 +50,9 @@ def _get_provider_by_index(idx: int) -> LLMProvider: | |
| Raises ValueError if index is out of range. | ||
| """ | ||
| if 1 <= idx <= len(_PROVIDER_CLASSES): | ||
| console.print("You selected provider:", _PROVIDER_CLASSES[idx - 1]().readable_name, style=f"bold {HELP_COLOR}") | ||
| return _PROVIDER_CLASSES[idx - 1]() | ||
| provider = _PROVIDER_CLASSES[idx - 1]() | ||
| console.print("You selected provider:", provider.readable_name, style=f"bold {HELP_COLOR}") | ||
| return provider | ||
| raise ValueError(f"Invalid provider index: {idx}") | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| # -------------------------------------------------------------------------------------------- | ||
| # Copyright (c) Microsoft Corporation. All rights reserved. | ||
| # Licensed under the MIT License. See License.txt in the project root for license information. | ||
| # -------------------------------------------------------------------------------------------- | ||
|
|
||
|
|
||
| from typing import Tuple | ||
|
|
||
| from .base import LLMProvider, is_valid_url, non_empty | ||
|
|
||
|
|
||
| def is_valid_api_base(v: str) -> bool: | ||
| if not v.startswith("https://"): | ||
| return False | ||
| return is_valid_url(v) | ||
|
|
||
|
|
||
| class AzureEntraIDProvider(LLMProvider): | ||
| @property | ||
| def readable_name(self) -> str: | ||
| return "Azure OpenAI (Microsoft Entra ID)" | ||
|
|
||
| @property | ||
| def model_route(self) -> str: | ||
| return "azure" | ||
|
|
||
| @property | ||
| def parameter_schema(self): | ||
| return { | ||
| "model": { | ||
| "secret": False, | ||
| "default": None, | ||
| "hint": "ensure your deployment name is the same as the model name, e.g., gpt-5", | ||
| "validator": non_empty, | ||
| "alias": "deployment_name" | ||
| }, | ||
| "api_base": { | ||
| "secret": False, | ||
| "default": None, | ||
| "validator": is_valid_api_base | ||
| }, | ||
| "api_version": { | ||
| "secret": False, | ||
| "default": "2025-04-01-preview", | ||
| "hint": None, | ||
| "validator": non_empty | ||
| } | ||
| } | ||
|
|
||
| def validate_connection(self, params: dict) -> Tuple[str, str]: | ||
| api_base = params.get("api_base") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unlike However, it would be helpful to add a comment here explaining why connection validation is skipped, so future maintainers don't mistake this for an incomplete implementation. def validate_connection(self, params: dict) -> Tuple[str, str]:
# Connection validation is skipped for Entra ID auth because the token
# is obtained at runtime via workload identity, not available at CLI config time.
...
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We cannot check validation for keyless auzre openai because it relies on azure role of the current user. |
||
| api_version = params.get("api_version") | ||
| deployment_name = params.get("model") | ||
|
|
||
| if not all([api_base, api_version, deployment_name]): | ||
| return "Missing required Azure parameters.", "retry_input" | ||
|
|
||
| return None, "save" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -179,6 +179,19 @@ def _setup_helm_deployment(console, aks_agent_manager: AKSAgentManager): | |
| f"\n👤 Current service account in namespace '{aks_agent_manager.namespace}': {service_account_name}", | ||
| style="cyan") | ||
|
|
||
| # Check if using Azure Entra ID provider and show role assignment reminder | ||
| model_list = aks_agent_manager.get_llm_config() | ||
| if model_list and any("azure/" in model_name and not model_config.get("api_key") for model_name, model_config in model_list.items()): | ||
| console.print( | ||
| f"\n⚠️ IMPORTANT: If using keyless authentication with Azure OpenAI, ensure the 'Cognitive Services OpenAI User' or 'Azure AI Developer' role " | ||
| f"is assigned to the workload identity (service account: {service_account_name}).", | ||
| style=f"bold {INFO_COLOR}" | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This Entra ID detection + warning block is duplicated almost identically in the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This minor change can be included in a following PR. |
||
| ) | ||
| console.print( | ||
| "Learn more: https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/managed-identity\n", | ||
| style=INFO_COLOR | ||
| ) | ||
|
|
||
| elif helm_status == "not_found": | ||
| console.print( | ||
| f"Helm chart not deployed (status: {helm_status}). Setting up deployment...", | ||
|
|
@@ -197,6 +210,19 @@ def _setup_helm_deployment(console, aks_agent_manager: AKSAgentManager): | |
| "'azure.workload.identity/client-id: <managed-identity-client-id>'.", | ||
| style=WARNING_COLOR) | ||
|
|
||
| # Check if using Azure Entra ID provider and show role assignment note | ||
| model_list = aks_agent_manager.get_llm_config() | ||
| if model_list and any("azure/" in model_name and not model_config.get("api_key") for model_name, model_config in model_list.items()): | ||
| console.print( | ||
| "\n⚠️ NOTE: You are using keyless authentication with Azure OpenAI. " | ||
| "Ensure the 'Cognitive Services OpenAI User' or 'Azure AI Developer' role is assigned to the workload identity.", | ||
| style=f"bold {INFO_COLOR}" | ||
| ) | ||
| console.print( | ||
| "Learn more: https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/managed-identity", | ||
| style=INFO_COLOR | ||
| ) | ||
|
|
||
| # Prompt user for service account name (required) | ||
| while True: | ||
| user_input = console.input( | ||
|
|
@@ -422,6 +448,7 @@ def aks_agent_cleanup( | |
| cluster_name, | ||
| namespace, | ||
| mode=None, | ||
| yes=False, | ||
| ): | ||
| """Cleanup and uninstall the AKS agent.""" | ||
| with CLITelemetryClient(event_type="cleanup") as telemetry_client: | ||
|
|
@@ -442,16 +469,17 @@ def aks_agent_cleanup( | |
| f"⚠️ Warning: --namespace '{namespace}' is specified but will be ignored in client mode.", | ||
| style=WARNING_COLOR) | ||
|
|
||
| console.print( | ||
| "\n⚠️ Warning: This will uninstall the AKS agent and delete all associated resources.", | ||
| style=WARNING_COLOR) | ||
| if not yes: | ||
| console.print( | ||
| "\n⚠️ Warning: This will uninstall the AKS agent and delete all associated resources.", | ||
| style=WARNING_COLOR) | ||
|
|
||
| user_confirmation = console.input( | ||
| f"\n[{WARNING_COLOR}]Are you sure you want to proceed with cleanup? (y/N): [/]").strip().lower() | ||
| user_confirmation = console.input( | ||
| f"\n[{WARNING_COLOR}]Are you sure you want to proceed with cleanup? (y/N): [/]").strip().lower() | ||
|
|
||
| if user_confirmation not in ['y', 'yes']: | ||
| console.print("❌ Cleanup cancelled.", style=INFO_COLOR) | ||
| return | ||
| if user_confirmation not in ['y', 'yes']: | ||
| console.print("❌ Cleanup cancelled.", style=INFO_COLOR) | ||
| return | ||
|
|
||
| console.print("\n🗑️ Starting cleanup (this typically takes a few seconds)...", style=INFO_COLOR) | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor: HISTORY.rst says "Bump aks-agent to v0.7.1" but the PR description says "v0.7.0". The code in
_consts.pymatches the changelog (v0.7.1). Please update the PR description to be consistent.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done