Merge branch 'main' into jal/simple-hf-lock

jakelorocco · web-flow · commit e8f1d62e82fb · 2025-12-05T10:40:59.000-05:00
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,11 +7,11 @@ repos:
       - id: ruff-format
         name: "Ruff formatter"
         args: [--config=pyproject.toml]
-        files: '^(mellea|tests|cli|docs).*\.(py|ipynb)$'
+        files: '^(mellea|test|cli|docs).*\.(py|ipynb)$'
       - id: ruff
         name: "Ruff linter"
         args: [--exit-non-zero-on-fix, --fix, --config=pyproject.toml]
-        files: '^(mellea|tests).*\.(py|ipynb)$'
+        files: '^(mellea).*\.(py|ipynb)$'
 
   - repo: local
     hooks:
@@ -20,7 +20,7 @@ repos:
         entry: uv run --no-sync mypy mellea
         pass_filenames: false
         language: system
-        files: '\.py$'
+        files: '^(mellea|test|cli|docs).*\.(py|ipynb)$'
 
   - repo: https://github.com/astral-sh/uv-pre-commit
     rev: 0.7.8
diff --git a/mellea/backends/adapters/adapter.py b/mellea/backends/adapters/adapter.py
@@ -2,11 +2,10 @@
 
 import abc
 import pathlib
-from typing import Any, TypeVar
+from typing import TypeVar
 
 import granite_common.intrinsics
 import yaml
-from litellm import cast
 
 from mellea.backends import Backend
 from mellea.backends.adapters.catalog import AdapterType, fetch_intrinsic_metadata
diff --git a/mellea/backends/huggingface.py b/mellea/backends/huggingface.py
@@ -281,6 +281,11 @@ async def _generate_from_intrinsic(
         if not ctx.is_chat_context:
             raise Exception("Does not yet support non-chat contexts.")
 
+        if len(model_options.items()) > 0:
+            FancyLogger.get_logger().info(
+                "passing in model options when generating with an adapter; some model options may be overwritten / ignored"
+            )
+
         linearized_ctx = ctx.view_for_generation()
         assert linearized_ctx is not None, (
             "If ctx.is_chat_context, then the context should be linearizable."
@@ -341,6 +346,12 @@ async def _generate_from_intrinsic(
             "messages": conversation,
             "extra_body": {"documents": docs},
         }
+
+        # Convert other parameters from Mellea proprietary format to standard format.
+        for model_option in model_options:
+            if model_option == ModelOption.TEMPERATURE:
+                request_json["temperature"] = model_options[model_option]
+
         rewritten = rewriter.transform(request_json, **action.intrinsic_kwargs)
 
         # TODO: Handle caching here. granite_common doesn't tell us what changed,
diff --git a/mellea/backends/litellm.py b/mellea/backends/litellm.py
@@ -54,12 +54,12 @@ def __init__(
         base_url: str | None = "http://localhost:11434",
         model_options: dict | None = None,
     ):
-        """Initialize and OpenAI compatible backend. For any additional kwargs that you need to pass the the client, pass them as a part of **kwargs.
+        """Initialize an OpenAI compatible backend using the [LiteLLM Python SDK](https://docs.litellm.ai/docs/#litellm-python-sdk).
 
         Note: If getting `Unclosed client session`, set `export DISABLE_AIOHTTP_TRANSPORT=True` in your environment. See: https://github.com/BerriAI/litellm/issues/13251.
 
         Args:
-            model_id : The LiteLLM model identifier. Make sure that all necessary credentials are in OS environment variables.
+            model_id : The LiteLLM model identifier; in most cases requires some combination of `<provider>/<model_creator>/<model_name>`. Make sure that all necessary credentials are in OS environment variables.
             formatter: A custom formatter based on backend.If None, defaults to TemplateFormatter
             base_url : Base url for LLM API. Defaults to None.
             model_options : Generation options to pass to the LLM. Defaults to None.
diff --git a/mellea/backends/openai.py b/mellea/backends/openai.py
@@ -435,6 +435,12 @@ async def _generate_from_intrinsic(
             "extra_body": {"documents": docs},
         }
 
+        # Convert other parameters from Mellea proprietary format to standard format.
+        if model_options is not None:
+            for model_option in model_options:
+                if model_option == ModelOption.TEMPERATURE:
+                    request_json["temperature"] = model_options[model_option]
+
         rewritten = rewriter.transform(request_json, **action.intrinsic_kwargs)
 
         self.load_adapter(adapter.qualified_name)
diff --git a/mellea/stdlib/intrinsics/rag.py b/mellea/stdlib/intrinsics/rag.py
@@ -9,6 +9,7 @@
     AdapterType,
     GraniteCommonAdapter,
 )
+from mellea.backends.types import ModelOption
 from mellea.stdlib.base import ChatContext, Document
 from mellea.stdlib.chat import Message
 from mellea.stdlib.intrinsics.intrinsic import Intrinsic
@@ -63,6 +64,7 @@ def _call_intrinsic(
         intrinsic,
         context,
         backend,
+        model_options={ModelOption.TEMPERATURE: 0.0},
         # No rejection sampling, please
         strategy=None,
     )
@@ -277,7 +279,7 @@ def rewrite_answer_for_relevance(
         backend,
         kwargs={
             "answer_relevance_category": result_json["answer_relevance_category"],
-            "answer_relevance_analysis": result_json["answer_relevance_category"],
+            "answer_relevance_analysis": result_json["answer_relevance_analysis"],
             "correction_method": correction_method,
         },
     )
diff --git a/mellea/templates/prompts/default/LLMaJRequirement.jinja2 b/mellea/templates/prompts/default/LLMaJRequirement.jinja2
diff --git a/test/backends/test_huggingface.py b/test/backends/test_huggingface.py
@@ -17,13 +17,22 @@
 from mellea.backends.formatter import TemplateFormatter
 from mellea.backends.huggingface import LocalHFBackend, _assert_correct_adapters
 from mellea.backends.types import ModelOption
-from mellea.stdlib.base import (CBlock, ChatContext, Context, ModelOutputThunk,
-                                SimpleContext)
+from mellea.stdlib.base import (
+    CBlock,
+    ChatContext,
+    Context,
+    ModelOutputThunk,
+    SimpleContext,
+)
 from mellea.stdlib.chat import Message
 from mellea.stdlib.intrinsics.intrinsic import Intrinsic
-from mellea.stdlib.requirement import (ALoraRequirement, LLMaJRequirement,
-                                       Requirement, ValidationResult,
-                                       default_output_to_bool)
+from mellea.stdlib.requirement import (
+    ALoraRequirement,
+    LLMaJRequirement,
+    Requirement,
+    ValidationResult,
+    default_output_to_bool,
+)
 
 
 @pytest.fixture(scope="module")
@@ -54,6 +63,7 @@ def session(backend):
     yield session
     session.reset()
 
+
 @pytest.mark.qualitative
 def test_adapters(backend):
     assert len(backend._added_adapters.items()) > 0
diff --git a/test/backends/test_litellm_ollama.py b/test/backends/test_litellm_ollama.py
@@ -26,9 +26,7 @@ def backend(gh_run: int):
             url = url.replace("127.0.0.1", "http://localhost")
 
         return LiteLLMBackend(
-            model_id=_MODEL_ID,
-            base_url=url,
-            model_options={"api_base": url},
+            model_id=_MODEL_ID, base_url=url, model_options={"api_base": url}
         )
     else:
         return LiteLLMBackend(model_id=_MODEL_ID)
@@ -111,12 +109,6 @@ def test_litellm_ollama_instruct_options(session):
         ModelOption.SEED: 123,
         ModelOption.TEMPERATURE: 0.5,
         ModelOption.MAX_NEW_TOKENS: 100,
-        
-        # Ollama thinking controls currently broken on Granite; see 
-        # https://github.com/ollama/ollama/issues/10983
-        # TODO: Re-enable when this upstream bug gets fixed.
-        #ModelOption.THINKING: True,
-        #"reasoning_effort": True,
         "homer_simpson": "option should be kicked out",
     }
 
@@ -144,6 +136,7 @@ def is_happy(text: str) -> bool:
     # should yield to true - but, of course, is model dependent
     assert h is True
 
+
 async def test_generate_from_raw(session):
     prompts = [
         "what is 1+1?",
@@ -157,7 +150,9 @@ async def test_generate_from_raw(session):
         actions=[CBlock(value=prompt) for prompt in prompts], ctx=session.ctx
     )
 
-    assert len(results) == 1, "ollama doesn't support batching; litellm should send a single message containing all prompts"
+    assert len(results) == 1, (
+        "ollama doesn't support batching; litellm should send a single message containing all prompts"
+    )
     assert results[0].value is not None
 
 
diff --git a/test/backends/test_litellm_watsonx.py b/test/backends/test_litellm_watsonx.py
@@ -41,18 +41,15 @@ def test_multiple_sync_funcs(session):
 
 @pytest.mark.qualitative
 async def test_generate_from_raw(session):
-    prompts = [
-        "what is 1+1?",
-        "what is 2+2?",
-        "what is 3+3?",
-        "what is 4+2+2?",
-    ]
+    prompts = ["what is 1+1?", "what is 2+2?", "what is 3+3?", "what is 4+2+2?"]
 
     results = await session.backend.generate_from_raw(
         actions=[CBlock(value=prompt) for prompt in prompts], ctx=session.ctx
     )
 
-    assert len(results) == 1, "litellm converts a batch request for watsonx into a single message"
+    assert len(results) == 1, (
+        "litellm converts a batch request for watsonx into a single message"
+    )
     assert results[0].value is not None
 
 
diff --git a/test/backends/test_openai_ollama.py b/test/backends/test_openai_ollama.py
@@ -122,6 +122,7 @@ async def test_generate_from_raw(m_session):
             actions=[CBlock(value=prompt) for prompt in prompts], ctx=m_session.ctx
         )
 
+
 # Default OpenAI implementation doesn't support structured outputs for the completions API.
 # def test_generate_from_raw_with_format(self):
 #     prompts = ["what is 1+1?", "what is 2+2?", "what is 3+3?", "what is 4+4?"]
diff --git a/test/backends/test_openai_vllm/test_openai_vllm.py b/test/backends/test_openai_vllm/test_openai_vllm.py
@@ -11,8 +11,12 @@
 from mellea.backends.openai import OpenAIBackend
 from mellea.backends.types import ModelOption, _ServerType
 from mellea.stdlib.base import CBlock, ChatContext, Context, ModelOutputThunk
-from mellea.stdlib.requirement import (ALoraRequirement, LLMaJRequirement,
-                                       Requirement, req)
+from mellea.stdlib.requirement import (
+    ALoraRequirement,
+    LLMaJRequirement,
+    Requirement,
+    req,
+)
 
 # The vllm tests are disabled by default, because we need a test environment with the vLLM server running.
 # We use an env var VLLM_TESTS_ENABLED to enable these tests.
@@ -138,8 +142,11 @@ class TestOpenAIALoraStuff:
         base_url="http://localhost:8000/v1",
         api_key="EMPTY",
     )
-    backend.add_adapter(GraniteCommonAdapter("requirement_check", 
-                                             base_model_name=backend.base_model_name))
+    backend.add_adapter(
+        GraniteCommonAdapter(
+            "requirement_check", base_model_name=backend.base_model_name
+        )
+    )
 
     m = MelleaSession(backend, ctx=ChatContext())
 
diff --git a/test/stdlib_basics/test_base.py b/test/stdlib_basics/test_base.py
@@ -26,5 +26,6 @@ def format_for_llm(self) -> str:
     c = _ClosuredComponent()
     assert len(c.parts()) == 0
 
+
 if __name__ == "__main__":
     pytest.main([__file__])
diff --git a/test/stdlib_basics/test_chat.py b/test/stdlib_basics/test_chat.py
@@ -3,6 +3,7 @@
 from mellea.stdlib.base import Document
 from mellea.stdlib.chat import Message
 
+
 def test_message_with_docs():
     doc = Document("I'm text!", "Im a title!")
     msg = Message("user", "hello", documents=[doc])
diff --git a/test/stdlib_basics/test_genslot.py b/test/stdlib_basics/test_genslot.py
diff --git a/test/stdlib_basics/test_reqlib_tools.py b/test/stdlib_basics/test_reqlib_tools.py
diff --git a/test/stdlib_basics/test_requirement.py b/test/stdlib_basics/test_requirement.py
diff --git a/test/stdlib_basics/test_session.py b/test/stdlib_basics/test_session.py
diff --git a/test/stdlib_intrinsics/test_rag/test_rag.py b/test/stdlib_intrinsics/test_rag/test_rag.py