Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions src/evidently/core/datasets.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import abc
import contextlib
import copy
import dataclasses
from abc import abstractmethod
from enum import Enum
from typing import TYPE_CHECKING
from typing import Any
from typing import ClassVar
from typing import Dict
Expand Down Expand Up @@ -30,6 +32,9 @@
from evidently.pydantic_utils import AutoAliasMixin
from evidently.pydantic_utils import EvidentlyBaseModel

if TYPE_CHECKING:
from evidently.llm.prompts import PromptRenderContext


class ColumnRole(Enum):
Unset = "Unset"
Expand Down Expand Up @@ -581,6 +586,7 @@ class DatasetStats:

class Dataset:
_data_definition: DataDefinition
_prompt_render_context_value: "PromptRenderContext"

@classmethod
def from_pandas(
Expand All @@ -590,7 +596,10 @@ def from_pandas(
descriptors: Optional[List[Descriptor]] = None,
options: AnyOptions = None,
) -> "Dataset":
from evidently.llm.prompts.reference import PromptRenderContext

dataset = PandasDataset(data, data_definition)
dataset._prompt_render_context_value = PromptRenderContext(Options.from_any_options(options))
if descriptors is not None:
dataset.add_descriptors(descriptors, options)
return dataset
Expand Down Expand Up @@ -631,6 +640,13 @@ def add_descriptors(self, descriptors: List[Descriptor], options: AnyOptions = N
for descriptor in descriptors:
self.add_descriptor(descriptor, options)

@contextlib.contextmanager
def _prompt_render_context(self):
from evidently.llm.prompts.reference import set_prompt_render_context

with set_prompt_render_context(self._prompt_render_context_value) as ctx:
yield ctx


INTEGER_CARDINALITY_LIMIT = 10

Expand Down Expand Up @@ -801,8 +817,9 @@ def add_column(self, key: str, data: DatasetColumn):
self._data_definition.categorical_descriptors.append(key)

def add_descriptor(self, descriptor: Descriptor, options: AnyOptions = None):
descriptor.validate_input(self._data_definition)
new_columns = descriptor.generate_data(self, Options.from_any_options(options))
with self._prompt_render_context():
descriptor.validate_input(self._data_definition)
new_columns = descriptor.generate_data(self, Options.from_any_options(options))
if isinstance(new_columns, DatasetColumn):
new_columns = {descriptor.alias: new_columns}
for col, value in new_columns.items():
Expand Down
Empty file.
101 changes: 101 additions & 0 deletions src/evidently/llm/prompts/blocks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import re
from abc import abstractmethod
from typing import Any
from typing import ClassVar
from typing import Dict
from typing import List
from typing import Optional
from typing import Tuple
from typing import Union

from evidently.llm.prompts.schema import JsonLLMResponseSchema
from evidently.llm.prompts.schema import LLMResponseSchema
from evidently.llm.prompts.utils import partial_format
from evidently.pydantic_utils import AutoAliasMixin
from evidently.pydantic_utils import EvidentlyBaseModel

Placeholder = str # potentially tuple[name, type]


class PromptBlock(AutoAliasMixin, EvidentlyBaseModel):
__alias_type__: ClassVar[str] = "prompt_block_2"

class Config:
is_base_type = True

@abstractmethod
def list_placeholders(self) -> List[Placeholder]:
raise NotImplementedError

@abstractmethod
def render(self, values: Optional[Dict[str, Any]] = None) -> str:
raise NotImplementedError

def get_response_schema(self) -> Optional[LLMResponseSchema]:
return None

@classmethod
def simple(cls, content: str) -> "PromptBlock":
return SimplePromptBlock(content=content)


placeholders_re = re.compile(r"\{([a-zA-Z0-9_]+)}")


class SimplePromptBlock(PromptBlock):
content: str

def list_placeholders(self) -> List[Placeholder]:
return list(placeholders_re.findall(self.content))

def render(self, values: Optional[Dict[str, Any]] = None) -> str:
if values is None:
return self.content
return partial_format(self.content, values)


class JsonOutputBlock(PromptBlock):
fields: Dict[str, Union[Tuple[str, str], str]]
search_for_substring: bool = True

def _render(self) -> str:
values = []
example_rows = []
for field, descr in self.fields.items():
if isinstance(descr, tuple):
descr, field_key = descr
else:
field_key = field
values.append(field)
example_rows.append(f'"{field_key}": "{descr}"')

example_rows_str = "\n".join(example_rows)
return f"Return {', '.join(values)} formatted as json without formatting as follows:\n{{{{\n{example_rows_str}\n}}}}"

def list_placeholders(self) -> List[Placeholder]:
return list(placeholders_re.findall(self._render()))

def render(self, values: Optional[Dict[str, Any]] = None) -> str:
return partial_format(self._render(), values)

def get_response_schema(self) -> Optional[LLMResponseSchema]:
# dunno
return JsonLLMResponseSchema(json_schema={f: {"type": "string"} for f in self.fields})


# class ClassificationPromptBlock(PromptBlock):
# where: str = ""
# what: str = "text"
# into_what: str = "categories"
# categories: List[str]
#
# def render(self, values: Optional[Dict[str, Any]] = None) -> str:
# if len(self.categories) < 2:
# raise ValueError("should be at least 2 categories")
# cats = ", ".join(self.categories[:-1]) + " and " + self.categories[-1]
# cont = f"Classify {self.what} {self.where} "\
# f"into {len(self.categories)} {self.into_what}: {cats}."
# return cont
#
# def list_placeholders(self) -> List[Placeholder]:
# return []
94 changes: 94 additions & 0 deletions src/evidently/llm/prompts/content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from typing import List
from typing import Optional
from typing import Sequence
from typing import Tuple

from evidently import ColumnType
from evidently._pydantic_compat import PrivateAttr
from evidently.legacy.features.llm_judge import BaseLLMPromptTemplate
from evidently.legacy.utils.llm.prompts import JsonOutputFormatBlock
from evidently.legacy.utils.llm.prompts import OutputFormatBlock
from evidently.legacy.utils.llm.prompts import PromptBlock as PromptBlockV1
from evidently.legacy.utils.llm.prompts import PromptTemplate
from evidently.llm.prompts.blocks import Placeholder
from evidently.llm.prompts.blocks import PromptBlock
from evidently.llm.prompts.models import OutputSchema
from evidently.llm.prompts.models import PromptContent
from evidently.llm.prompts.schema import JsonLLMResponseSchema
from evidently.llm.prompts.schema import LLMResponseSchema
from evidently.llm.prompts.schema import PlainLLMResponseSchema


def get_response_schema(blocks: List[PromptBlock]) -> LLMResponseSchema:
try:
return next(b.get_response_schema() for b in blocks if b.get_response_schema() is not None)
except StopIteration:
return PlainLLMResponseSchema()


class GenericLLMPromptTemplate(BaseLLMPromptTemplate):
class Config:
type_alias = "evidently:prompt_template:GenericLLMPromptTemplate"

blocks: List[PromptBlock]
_response_schema: LLMResponseSchema = PrivateAttr()

@property
def response_schema(self) -> LLMResponseSchema:
try:
return self._response_schema
except AttributeError:
self._response_schema = get_response_schema(self.blocks)
return self._response_schema

def list_output_columns(self) -> List[str]:
raise NotImplementedError()

def get_type(self, subcolumn: Optional[str]) -> ColumnType:
raise NotImplementedError()

def get_main_output_column(self) -> str:
raise NotImplementedError()

def get_blocks(self) -> Sequence[PromptBlockV1]:
return self.blocks

def get_output_format(self) -> OutputFormatBlock:
rs = self.response_schema
if isinstance(rs, JsonLLMResponseSchema):
return JsonOutputFormatBlock(fields={})
raise NotImplementedError()


class TemplatePromptContent(PromptContent):
blocks: List[PromptBlock]

def as_string(self) -> str:
return "\n".join(b.render() for b in self.blocks)

def as_template(self) -> PromptTemplate:
return GenericLLMPromptTemplate(blocks=self.blocks)

def list_placeholders(self) -> List[Placeholder]:
return [p for b in self.blocks for p in b.list_placeholders()]

def output_schema(self) -> Optional[OutputSchema]:
raise NotImplementedError


class ChatPromptContent(PromptContent):
messages: List[Tuple[str, TemplatePromptContent]]

def as_string(self) -> str:
return "\n\n".join(f"{role}: {t.as_string()}" for role, t in self.messages)

def as_template(self) -> PromptTemplate:
raise NotImplementedError

def list_placeholders(self) -> List[Placeholder]:
return [p for _, t in self.messages for p in t.list_placeholders()]

def output_schema(self) -> Optional[OutputSchema]:
if len(self.messages) == 0:
return None
return self.messages[-1][1].output_schema()
79 changes: 79 additions & 0 deletions src/evidently/llm/prompts/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import uuid
from abc import abstractmethod
from datetime import datetime
from enum import Enum
from typing import Any
from typing import ClassVar
from typing import Dict
from typing import List
from typing import Optional

from evidently._pydantic_compat import BaseModel
from evidently._pydantic_compat import Field
from evidently.legacy.utils.llm.prompts import PromptTemplate
from evidently.pydantic_utils import AutoAliasMixin
from evidently.pydantic_utils import EvidentlyBaseModel
from evidently.ui.service.type_aliases import ZERO_UUID
from evidently.ui.service.type_aliases import ProjectID
from evidently.ui.service.type_aliases import UserID

PromptID = uuid.UUID
PromptVersionID = uuid.UUID

OutputSchema = Dict[str, Any]


class PromptContentType(Enum):
CHAT = "chat"
TEMPLATE = "template"
STRING = "string"


class PromptContent(AutoAliasMixin, EvidentlyBaseModel):
__alias_type__: ClassVar = "prompt_content"

class Config:
is_base_type = True

@abstractmethod
def as_string(self) -> str:
raise NotImplementedError

@abstractmethod
def as_template(self) -> PromptTemplate:
raise NotImplementedError

@abstractmethod
def list_placeholders(self) -> List[str]:
raise NotImplementedError

@abstractmethod
def output_schema(self) -> Optional[OutputSchema]:
raise NotImplementedError


class PromptMetadata(BaseModel):
created_at: datetime = Field(default_factory=datetime.now)
updated_at: datetime = Field(default_factory=datetime.now)
author: Optional[UserID] = None


class Prompt(BaseModel):
id: PromptID = ZERO_UUID
project_id: ProjectID = ZERO_UUID
name: str
metadata: PromptMetadata = PromptMetadata()


class PromptVersionMetadata(BaseModel):
created_at: datetime = Field(default_factory=datetime.now)
updated_at: datetime = Field(default_factory=datetime.now)
author: Optional[UserID] = None


class PromptVersion(BaseModel):
id: PromptVersionID = ZERO_UUID
prompt_id: PromptID = ZERO_UUID
version: int
metadata: PromptVersionMetadata = PromptVersionMetadata()
content: PromptContent
Loading