HumanSignal · robot-ci-heartex · Feb 11, 2025 · Feb 7, 2025 · Feb 10, 2025 · Feb 11, 2025
diff --git a/adala/runtimes/_litellm.py b/adala/runtimes/_litellm.py
@@ -163,8 +163,13 @@ def normalize_litellm_model_and_provider(model_name: str, provider: str):
     if "/" in model_name:
         model_name = model_name.split("/", 1)[1]
     provider = provider.lower()
+    # TODO: move this logic to LSE, this is the last place Adala needs to be updated when adding a provider connection
     if provider == "vertexai":
         provider = "vertex_ai"
+    if provider == "azureopenai":
+        provider = "azure"
+    if provider == "azureaifoundry":
+        provider = "azure_ai"
 
     return model_name, provider
 
@@ -578,17 +583,26 @@ def _get_prompt_tokens(string: str, model: str, output_fields: List[str]) -> int
 
     @staticmethod
     def _get_completion_tokens(
-        model: str, output_fields: Optional[List[str]], provider: str
+        candidate_model_names: List[str],
+        output_fields: Optional[List[str]],
+        provider: str,
     ) -> int:
-        model, provider = normalize_litellm_model_and_provider(model, provider)
-        max_tokens = litellm.get_model_info(
-            model=model, custom_llm_provider=provider
-        ).get("max_tokens", None)
-        if not max_tokens:
-            raise ValueError
-        # extremely rough heuristic, from testing on some anecdotal examples
-        n_outputs = len(output_fields) if output_fields else 1
-        return min(max_tokens, 4 * n_outputs)
+        for model in candidate_model_names:
+            model, provider = normalize_litellm_model_and_provider(model, provider)
+            try:
+                max_tokens = litellm.get_model_info(
+                    model=model, custom_llm_provider=provider
+                ).get("max_tokens", None)
+            except Exception as e:
+                if "model isn't mapped" in str(e):
+                    continue
+                else:
+                    raise e
+            if not max_tokens:
+                raise ValueError
+            # extremely rough heuristic, from testing on some anecdotal examples
+            n_outputs = len(output_fields) if output_fields else 1
+            return min(max_tokens, 4 * n_outputs)
 
     @classmethod
     def _estimate_cost(
@@ -599,12 +613,38 @@ def _estimate_cost(
         provider: str,
     ):
         prompt_tokens = cls._get_prompt_tokens(user_prompt, model, output_fields)
-        completion_tokens = cls._get_completion_tokens(model, output_fields, provider)
-        prompt_cost, completion_cost = litellm.cost_per_token(
-            model=model,
-            prompt_tokens=prompt_tokens,
-            completion_tokens=completion_tokens,
+        # amazingly, litellm.cost_per_token refers to a hardcoded dictionary litellm.model_cost which is case-sensitive with inconsistent casing.....
+        # Example: 'azure_ai/deepseek-r1' vs 'azure_ai/Llama-3.3-70B-Instruct'
+        # so we have no way of determining the correct casing or reliably fixing it.
+        # we can at least try all-lowercase.
+        candidate_model_names = [model, model.lower()]
+        # ...and Azure AI Foundry openai models are not listed there, but under Azure OpenAI
+        if model.startswith("azure_ai/"):
+            candidate_model_names.append(model.replace("azure_ai/", "azure/"))
+            candidate_model_names.append(model.replace("azure_ai/", "azure/").lower())
+
+        completion_tokens = cls._get_completion_tokens(
+            candidate_model_names, output_fields, provider
         )
+
+        for candidate_model_name in candidate_model_names:
+            try:
+                prompt_cost, completion_cost = litellm.cost_per_token(
+                    model=candidate_model_name,
+                    prompt_tokens=prompt_tokens,
+                    completion_tokens=completion_tokens,
+                )
+                break
+            except Exception as e:
+                # it also doesn't have a type to catch:
+                # Exception("This model isn't mapped yet. model=azure_ai/deepseek-R1, custom_llm_provider=azure_ai. Add it here - https://github.com/ BerriAI/litellm/blob/main/model_prices_and_context_window.json.")
+                if "model isn't mapped" in str(e):
+                    prompt_cost, completion_cost = None, None
+                else:
+                    raise e
+        if prompt_cost is None or completion_cost is None:
+            raise ValueError(f"Model {model} for provider {provider} not found.")
+
         total_cost = prompt_cost + completion_cost
 
         return prompt_cost, completion_cost, total_cost
@@ -624,10 +664,25 @@ def get_cost_estimate(
             cumulative_prompt_cost = 0
             cumulative_completion_cost = 0
             cumulative_total_cost = 0
+            # for azure, we need the canonical model name, not the deployment name
+            if self.model.startswith("azure/"):
+                messages = [{"role": "user", "content": "Hey, how's it going?"}]
+                response = litellm.completion(
+                    messages=messages,
+                    model=self.model,
+                    max_tokens=10,
+                    temperature=self.temperature,
+                    seed=self.seed,
+                    # extra inference params passed to this runtime
+                    **self.model_extra,
+                )
+                model = "azure/" + response.model
+            else:
+                model = self.model
             for user_prompt in user_prompts:
                 prompt_cost, completion_cost, total_cost = self._estimate_cost(
                     user_prompt=user_prompt,
-                    model=self.model,
+                    model=model,
                     output_fields=output_fields,
                     provider=provider,
                 )

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -42,7 +42,7 @@ celery = {version = "^5.3.6", extras = ["redis"]}
 kombu = ">=5.4.0rc2" # Pin version to fix https://github.com/celery/celery/issues/8030. TODO: remove when this fix will be included in celery
 uvicorn = "*"
 pydantic-settings = "^2.2.1"
-label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/6f23319b30c84ecf80b254e0a8bdb18c16d98219.zip"}
+label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/73e406721b62513d2b7c3962d8af9fd072caeff0.zip"}
 kafka-python-ng = "^2.2.3"
 requests = "^2.32.0"
 # Using litellm from forked repo until vertex fix is released: https://github.com/BerriAI/litellm/issues/7904

diff --git a/server/app.py b/server/app.py
@@ -246,6 +246,7 @@ async def submit_batch(batch: BatchData):
 
 @app.post("/validate-connection", response_model=Response[ValidateConnectionResponse])
 async def validate_connection(request: ValidateConnectionRequest):
+    # TODO: move this logic to LSE, this is the last place Adala needs to be updated when adding a provider connection
     multi_model_provider_test_models = {
         "openai": "gpt-4o-mini",
         "vertexai": "vertex_ai/gemini-1.5-flash",
@@ -290,6 +291,9 @@ async def validate_connection(request: ValidateConnectionRequest):
         if provider.lower() == "azureopenai":
             model = "azure/" + request.deployment_name
             model_extra = {"base_url": request.endpoint}
+        elif provider.lower() == "azureaifoundry":
+            model = "azure_ai/" + request.deployment_name
+            model_extra = {"base_url": request.endpoint}
         elif provider.lower() == "custom":
             model = "openai/" + request.deployment_name
             model_extra = (