v3.12.0:

- Added a "prompt" attribute to LLM response strings, referencing the prompt used to generate them (can be disabled with the SAVE_MEMORY configuration option). - Enhanced dicts returned by LLMResponse.parse_json() to include a new "llm_response" attribute, referencing the original LLM response string (dict content remains unaffected). - Updated Role.<NAME> and ApiType.<NAME> values to be Enums of their respective types, rather than plain strings. - Improved type definitions and type hints across the codebase.
Nayjest · Nov 22, 2024 · c4c87fd · c4c87fd
1 parent 35106fb
commit c4c87fd
Show file tree

Hide file tree

Showing 15 changed files with 87 additions and 35 deletions.
diff --git a/examples/code-review-tool/app.py b/examples/code-review-tool/app.py
@@ -12,6 +12,7 @@
 python app.py <full-path-to-patch-file>
 
 """
+
 import json
 import sys
 from pathlib import Path

diff --git a/microcore/__init__.py b/microcore/__init__.py
@@ -150,4 +150,4 @@ def has_content(self, collection: str) -> bool:
     # "wrappers",
 ]
 
-__version__ = "3.11.1"
+__version__ = "3.12.0"
diff --git a/microcore/_env.py b/microcore/_env.py
@@ -1,15 +1,18 @@
 import os.path
 from dataclasses import dataclass, field, asdict, fields
 from importlib.util import find_spec
+from typing import TYPE_CHECKING
+
 import jinja2
 
+from .embedding_db import AbstractEmbeddingDB
 from .configuration import Config, ApiType, LLMConfigError
-from . import AbstractEmbeddingDB
 from .types import TplFunctionType, LLMAsyncFunctionType, LLMFunctionType
 from .templating.jinja2 import make_jinja2_env, make_tpl_function
 from .llm.openai_llm import make_llm_functions as make_openai_llm_functions
 from .llm.local_llm import make_llm_functions as make_local_llm_functions
-
+if TYPE_CHECKING:
+    from .wrappers.llm_response_wrapper import LLMResponse  # noqa: F401
 
 @dataclass
 class Env:
@@ -46,11 +49,10 @@ def init_templating(self):
 
     def init_llm(self):
         if self.config.LLM_API_TYPE == ApiType.NONE:
-
-            def not_configured(*args, **kwargs):
+            def not_configured(*args, **kwargs) -> "LLMResponse":
                 raise LLMConfigError("Language model is not configured")
 
-            async def a_not_configured(*args, **kwargs):
+            async def a_not_configured(*args, **kwargs) -> "LLMResponse":
                 raise LLMConfigError("Language model is not configured")
 
             self.llm_function, self.llm_async_function = (

diff --git a/microcore/_llm_functions.py b/microcore/_llm_functions.py
@@ -1,17 +1,17 @@
 from datetime import datetime
 
-from .message_types import Msg
 from .utils import run_parallel
 from .wrappers.llm_response_wrapper import LLMResponse
+from .types import TPrompt
 from ._env import env
 
 
-def llm(prompt: str | Msg | list[str] | list[Msg], **kwargs) -> str | LLMResponse:
+def llm(prompt: TPrompt, **kwargs) -> str | LLMResponse:
     """
     Request Large Language Model synchronously
 
     Args:
-        prompt (str | list[str]): Text to send to LLM
+        prompt (str | Msg | dict | list[str | Msg | dict]): Text to send to LLM
         **kwargs (dict): Parameters supported by the LLM API
 
             See parameters supported by the OpenAI:
@@ -43,20 +43,20 @@ def llm(prompt: str | Msg | list[str] | list[Msg], **kwargs) -> str | LLMRespons
     response = env().llm_function(prompt, **kwargs)
     try:
         response.gen_duration = (datetime.now() - start).total_seconds()
+        if not env().config.SAVE_MEMORY:
+            response.prompt = prompt
     except AttributeError:
         ...
     [h(response) for h in env().llm_after_handlers]
     return response
 
 
-async def allm(
-    prompt: str | Msg | list[str] | list[Msg], **kwargs
-) -> str | LLMResponse:
+async def allm(prompt: TPrompt, **kwargs) -> str | LLMResponse:
     """
     Request Large Language Model asynchronously
 
     Args:
-        prompt (str | list[str]): Text to send to LLM
+        prompt (str | Msg | dict | list[str | Msg | dict]): Text to send to LLM
         **kwargs (dict): Parameters supported by the LLM API
 
             See parameters supported by the OpenAI:
@@ -90,15 +90,22 @@ async def allm(
     response = await env().llm_async_function(prompt, **kwargs)
     try:
         response.gen_duration = (datetime.now() - start).total_seconds()
+        if not env().config.SAVE_MEMORY:
+            response.prompt = prompt
     except AttributeError:
         ...
     [h(response) for h in env().llm_after_handlers]
     return response
 
 
 async def llm_parallel(
-    prompts: list, max_concurrent_tasks: int = None, **kwargs
-) -> list[str] | list[LLMResponse]:
+    prompts: list[TPrompt], max_concurrent_tasks: int = None, **kwargs
+) -> list[str | LLMResponse]:
+    """
+    Execute multiple LLM requests in parallel
+
+    Returns (list[LLMResponse | str]): a list of responses in the same order as the prompts
+    """
     tasks = [allm(prompt, **kwargs) for prompt in prompts]
 
     if max_concurrent_tasks is None:

diff --git a/microcore/_prepare_llm_args.py b/microcore/_prepare_llm_args.py
@@ -1,6 +1,7 @@
 from dataclasses import asdict
 
 from .message_types import DEFAULT_MESSAGE_ROLE, Msg
+from .types import TPrompt
 
 
 def prepare_prompt(prompt) -> str:
@@ -13,9 +14,9 @@ def prepare_prompt(prompt) -> str:
     )
 
 
-def prepare_chat_messages(prompt) -> list[dict]:
+def prepare_chat_messages(prompt: TPrompt) -> list[dict]:
     """Converts prompt to messages for LLM chat API (OpenAI)"""
-    messages = prompt if isinstance(prompt, list) else [prompt]
+    messages: list = prompt if isinstance(prompt, list) else [prompt]
     return [
         (
             dict(role=DEFAULT_MESSAGE_ROLE, content=msg)

diff --git a/microcore/configuration.py b/microcore/configuration.py
@@ -2,6 +2,7 @@
 import logging
 import os
 from dataclasses import dataclass, field, fields
+from enum import Enum
 from pathlib import Path
 from typing import Any, Union, Callable
 
@@ -31,9 +32,7 @@ def get_bool_from_env(env_var: str, default: bool | None = False) -> bool | None
 
 
 def get_object_from_env(env_var: str, dtype: type, default: Any = None):
-    val_from_env = os.getenv(  # pylint: disable=W1508
-        env_var, _MISSING
-    )
+    val_from_env = os.getenv(env_var, _MISSING)  # pylint: disable=W1508
     if isinstance(val_from_env, str):
         val_from_env = val_from_env.strip()
         if val_from_env:
@@ -56,7 +55,7 @@ def get_object_from_env(env_var: str, dtype: type, default: Any = None):
     return val_from_env
 
 
-class ApiType:
+class ApiType(str, Enum):
     """LLM API types"""
 
     OPEN_AI = "open_ai"
@@ -372,6 +371,12 @@ class Config(LLMConfig):
 
     MAX_CONCURRENT_TASKS: int = from_env(default=None)
 
+    SAVE_MEMORY: bool = from_env(dtype=bool, default=False)
+    """
+    Some additional data will not be collected:
+      - LLMResponse objects will not contain the links to the prompt field
+    """
+
     def __post_init__(self):
         super().__post_init__()
         if self.TEXT_TO_SPEECH_PATH is None:

diff --git a/microcore/json_parsing.py b/microcore/json_parsing.py
@@ -117,7 +117,7 @@ def between_lines(pattern):
 
     try:
         # Python-style values instead of JSON (inside fields)
-        mapping = {"False": "false", "True":"true", "None": "null"}
+        mapping = {"False": "false", "True": "true", "None": "null"}
         for pythonic, jsonic in mapping.items():
             s = re.sub(rf"\"\:\s*{pythonic}(?=\s*[\,\}}])", f"\": {jsonic}", s)
         return json.dumps(json.loads(s), indent=4)

diff --git a/microcore/message_types.py b/microcore/message_types.py
@@ -1,9 +1,10 @@
 """ Message classes for OpenAI Chat API """
 
+from enum import Enum
 from dataclasses import dataclass, field
 
 
-class Role:
+class Role(str, Enum):
     SYSTEM = "system"
     USER = "user"
     ASSISTANT = "assistant"

diff --git a/microcore/tokenizing.py b/microcore/tokenizing.py
@@ -5,7 +5,8 @@
 from ._env import env
 
 
-class CantLoadTikTokenEncoding(RuntimeError): ...
+class CantLoadTikTokenEncoding(RuntimeError):
+    ...
 
 
 def _resolve_tiktoken_encoding(

diff --git a/microcore/types.py b/microcore/types.py
@@ -1,11 +1,17 @@
-from typing import Callable, Any, Awaitable, Union
+from typing import TYPE_CHECKING, Callable, Any, Awaitable, Union, List
 from os import PathLike
 
+from .message_types import Msg
+if TYPE_CHECKING:
+    from .wrappers.prompt_wrapper import PromptWrapper  # noqa: F401
+
+TPrompt = Union[dict, Msg, str, 'PromptWrapper', List[Union[dict, Msg, str, 'PromptWrapper']]]
+"""Type for prompt argument in LLM requests"""
 TplFunctionType = Callable[[Union[PathLike[str], str], Any], str]
 """Function type for rendering prompt templates"""
-LLMFunctionType = Callable[[str, Any], str]
+LLMFunctionType = Callable[[TPrompt, Any], str]
 """Function type for requesting LLM synchronously"""
-LLMAsyncFunctionType = Callable[[str, Any], Awaitable[str]]
+LLMAsyncFunctionType = Callable[[TPrompt, Any], Awaitable[str]]
 """Function type for requesting LLM asynchronously"""
 
 

diff --git a/microcore/ui.py b/microcore/ui.py
@@ -60,8 +60,7 @@ def ask_non_empty(msg):
         i = input(msg)
         if i.strip():
             break
-        else:
-            error("Empty input")
+        error("Empty input")
     return i
 
 

diff --git a/microcore/utils.py b/microcore/utils.py
@@ -212,6 +212,9 @@ def is_google_colab() -> bool:
 
 
 def get_vram_usage(as_string=True, color=Fore.GREEN):
+    """
+    Returns GPU VRAM usage as a string or a list of objects.
+    """
     @dataclass
     class _MemUsage:
         name: str

diff --git a/microcore/wrappers/llm_response_wrapper.py b/microcore/wrappers/llm_response_wrapper.py
@@ -1,11 +1,19 @@
 from typing import Any
 
-from ..types import BadAIAnswer
+from ..types import BadAIAnswer, TPrompt
 from ..json_parsing import parse_json
 from ..utils import ExtendedString, ConvertableToMessage, extract_number
 from ..message_types import Role, AssistantMsg
 
 
+class DictFromLLMResponse(dict):
+    llm_response: "LLMResponse"
+
+    def from_llm_response(self, llm_response: "LLMResponse"):
+        self.llm_response = llm_response
+        return self
+
+
 class LLMResponse(ExtendedString, ConvertableToMessage):
     """
     Response from the Large Language Model.
@@ -20,11 +28,17 @@ class LLMResponse(ExtendedString, ConvertableToMessage):
     - https://platform.openai.com/docs/api-reference/chat/object
     """
 
+    role: Role
+    content: str
+    prompt: TPrompt
+    gen_duration: float
+
     def __new__(cls, string: str, attrs: dict = None):
         attrs = {
             **(attrs or {}),
             "role": Role.ASSISTANT,
             "content": str(string),
+            "prompt": None,
             # generation duration in seconds (float), used in metrics
             "gen_duration": None,
         }
@@ -33,8 +47,12 @@ def __new__(cls, string: str, attrs: dict = None):
 
     def parse_json(
         self, raise_errors: bool = True, required_fields: list[str] = None
-    ) -> list | dict | float | int | str:
-        return parse_json(self.content, raise_errors, required_fields)
+    ) -> list | dict | float | int | str | DictFromLLMResponse:
+        res = parse_json(self.content, raise_errors, required_fields)
+        if isinstance(res, dict):
+            res = DictFromLLMResponse(res)
+            res.llm_response = self
+        return res
 
     def parse_number(
         self,

diff --git a/microcore/wrappers/prompt_wrapper.py b/microcore/wrappers/prompt_wrapper.py
@@ -1,15 +1,23 @@
+from typing import TYPE_CHECKING, Union
+
 from .._llm_functions import allm, llm
 from ..utils import ExtendedString, ConvertableToMessage
+if TYPE_CHECKING:
+    from .wrappers.llm_response_wrapper import LLMResponse  # noqa: F401
 
 
 class PromptWrapper(ExtendedString, ConvertableToMessage):
-    def to_llm(self, **kwargs):
+    """
+    A utility class that wraps a prompt string, extending it with convenient methods
+    for enhanced functionality.
+    """
+    def to_llm(self, **kwargs) -> Union[str, "LLMResponse"]:
         """
         Send prompt to Large Language Model, see `llm`
         """
         return llm(self, **kwargs)
 
-    async def to_allm(self, **kwargs):
+    async def to_allm(self, **kwargs) -> Union[str, "LLMResponse"]:
         """
         Send prompt to Large Language Model asynchronously, see `allm`
         """

diff --git a/tests/apis/setup_env.py b/tests/apis/setup_env.py
@@ -19,7 +19,7 @@ def setup_env(request):
     microcore.configure(
         USE_DOT_ENV=True,
         DOT_ENV_FILE=request.param,
-        LLM_DEFAULT_ARGS=dict(temperature=0.01)
+        LLM_DEFAULT_ARGS=dict(temperature=0.01),
     )
     yield
     os.environ.clear()