From b988bff604a66f7cc18996d6fc5f9b700748d7f4 Mon Sep 17 00:00:00 2001 From: Joel Alexander Date: Wed, 5 Jun 2024 12:59:38 -0400 Subject: [PATCH 1/2] feat(client): get scores endpoint --- parea/client.py | 42 ++++++- .../dynamic_few_shot_injection_with_evals.py | 106 ++++++++++++++++++ parea/helpers.py | 8 +- parea/schemas/models.py | 2 +- 4 files changed, 149 insertions(+), 9 deletions(-) create mode 100644 parea/cookbook/openai/dynamic_few_shot_injection_with_evals.py diff --git a/parea/client.py b/parea/client.py index 1387b221..fa8f0369 100644 --- a/parea/client.py +++ b/parea/client.py @@ -16,6 +16,7 @@ from parea.experiment.datasets import create_test_cases, create_test_collection from parea.helpers import gen_trace_id, serialize_metadata_values, structure_trace_log_from_api, structure_trace_logs_from_api from parea.parea_logger import parea_logger +from parea.schemas import EvaluationResult from parea.schemas.models import ( Completion, CompletionResponse, @@ -33,6 +34,7 @@ TestCaseCollection, TraceLog, TraceLogFilters, + TraceLogTree, UseDeployedPrompt, UseDeployedPromptResponse, ) @@ -397,11 +399,30 @@ def _update_data_and_trace(self, data: Completion) -> Completion: return data - def get_trace_log(self, trace_id: str) -> TraceLog: + def get_trace_log(self, trace_id: str) -> TraceLogTree: response = self._client.request("GET", GET_TRACE_LOG_ENDPOINT.format(trace_id=trace_id)) return structure_trace_log_from_api(response.json()) - async def aget_trace_log(self, trace_id: str) -> TraceLog: + def get_trace_log_scores(self, trace_id: str, check_context: bool = True) -> List[EvaluationResult]: + """ + Get the scores from the trace log. If the scores are not present in the trace log, fetch them from the DB. + Args: + trace_id: The trace id to get the scores for. + check_context: If True, will check the context for the scores first before fetching from the DB. + + Returns: A list of EvaluationResult objects. + """ + # try to get trace_id scores from context + if check_context: + if scores := (trace_data.get()[trace_id].scores or []): + print("Scores from context", scores) + return scores + + response = self._client.request("GET", GET_TRACE_LOG_ENDPOINT.format(trace_id=trace_id)) + tree: TraceLogTree = structure_trace_log_from_api(response.json()) + return extract_scores(tree) + + async def aget_trace_log(self, trace_id: str) -> TraceLogTree: response = await self._client.request_async("GET", GET_TRACE_LOG_ENDPOINT.format(trace_id=trace_id)) return structure_trace_log_from_api(response.json()) @@ -413,11 +434,11 @@ async def alist_experiments(self, filter_conditions: Optional[ListExperimentUUID response = await self._client.request_async("POST", LIST_EXPERIMENTS_ENDPOINT, data=asdict(filter_conditions)) return structure(response.json(), List[ExperimentWithPinnedStatsSchema]) - def get_experiment_trace_logs(self, experiment_uuid: str, filters: TraceLogFilters = TraceLogFilters()) -> List[TraceLog]: + def get_experiment_trace_logs(self, experiment_uuid: str, filters: TraceLogFilters = TraceLogFilters()) -> List[TraceLogTree]: response = self._client.request("POST", GET_EXPERIMENT_LOGS_ENDPOINT.format(experiment_uuid=experiment_uuid), data=asdict(filters)) return structure_trace_logs_from_api(response.json()) - async def aget_experiment_trace_logs(self, experiment_uuid: str, filters: TraceLogFilters = TraceLogFilters()) -> List[TraceLog]: + async def aget_experiment_trace_logs(self, experiment_uuid: str, filters: TraceLogFilters = TraceLogFilters()) -> List[TraceLogTree]: response = await self._client.request_async("POST", GET_EXPERIMENT_LOGS_ENDPOINT.format(experiment_uuid=experiment_uuid), data=asdict(filters)) return structure_trace_logs_from_api(response.json()) @@ -446,3 +467,16 @@ def new_init(self, *args, **kwargs): subclass = type(openai_client.__name__, (openai_client,), {"__init__": new_init}) return subclass + + +def extract_scores(tree: TraceLogTree) -> List[EvaluationResult]: + scores: List[EvaluationResult] = [] + + def traverse(node: TraceLogTree): + if node.scores: + scores.extend(node.scores or []) + for child in node.children_logs: + traverse(child) + + traverse(tree) + return scores diff --git a/parea/cookbook/openai/dynamic_few_shot_injection_with_evals.py b/parea/cookbook/openai/dynamic_few_shot_injection_with_evals.py new file mode 100644 index 00000000..67fc0f91 --- /dev/null +++ b/parea/cookbook/openai/dynamic_few_shot_injection_with_evals.py @@ -0,0 +1,106 @@ +from typing import List, Optional + +import os +import random + +from dotenv import load_dotenv +from openai import OpenAI +from pydantic import BaseModel + +from parea import Parea, get_current_trace_id, trace, trace_insert +from parea.schemas import Log, TestCase + +load_dotenv() + +client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) +p = Parea(api_key=os.getenv("PAREA_API_KEY")) +p.wrap_openai_client(client) + +NUM_INTERACTIONS = 5 + + +class Person(BaseModel): + name: str + email: str + + +class Email(BaseModel): + contact: Person + email_sent: str + + +mock_DB: dict[str, Email] = {} + + +def call_llm(messages: List[dict], model: str = "gpt-4o", temperature: float = 0.0) -> str: + return client.chat.completions.create(model=model, temperature=temperature, messages=messages).choices[0].message.content + + +def eval_func(log: Log) -> float: + return random.uniform(0, 1) + + +# Imitate collecting few shot examples from prod based on user feedback +@trace(eval_funcs=[eval_func]) +def email_writer(main_objective: str, contact: Person, few_shot_examples: Optional[List[str]] = None) -> str: + trace_insert({"end_user_identifier": contact.name, "metadata": {"has_few_shot_examples": bool(few_shot_examples)}}) + + few_shot_examples_prompt = ("\nHere are some examples of good emails\n" + "\n".join(few_shot_examples)) if few_shot_examples else "" + messages = [ + { + "role": "system", + "content": f"You are an AI who performs an email writing task based on the following objective: {main_objective}", + }, + { + "role": "user", + "content": f""" + Your email is from: {contact.model_dump()} + {few_shot_examples_prompt if few_shot_examples else ""} + Email: + """, + }, + ] + response = call_llm(messages) + trace_id = get_current_trace_id() + # insert into mock_DB + mock_DB[trace_id] = Email(contact=contact, email_sent=response) + return response + + +def mimic_prod(few_shot_limit: int = 3): + contact = Person(name="John Doe", email="jdoe@email.com") + dataset = p.get_collection("Good_Email_Examples") + selected_few_shot_examples = None + if dataset: + testcases: list[TestCase] = list(dataset.test_cases.values()) + few_shot_examples = [case.inputs["email"] for case in testcases if case.inputs["user"] == contact.name] + # This is simply taking most recent n examples. You can imagine adding additional logic to the dataset + # that allows you to rank the examples based on some criteria + selected_few_shot_examples = few_shot_examples[-few_shot_limit:] if few_shot_examples else None + for interaction in range(NUM_INTERACTIONS): + email = email_writer("Convincing email to gym to cancel membership early.", contact, selected_few_shot_examples) + print(email) + + +def add_good_email_example_to_dataset(user_name, email): + # Note: if the test case collection doesn't exist, we will create a new collection with the provided name and data + p.add_test_cases([{"user": user_name, "email": email}], name="Good_Email_Examples") + + +def mimic_prod_checking_eval_scores(): + # imagine the trace_id of the email is stored in state in the UI, so when the user provides feedback, we can use it + trace_ids = mock_DB.keys() + for trace_id in trace_ids: + scores = p.get_trace_log_scores(trace_id) + for score in scores: + if score.name == "eval_func" and score.score >= 0.5: + add_good_email_example_to_dataset(mock_DB[trace_id].contact.name, mock_DB[trace_id].email_sent) + break + + +if __name__ == "__main__": + mimic_prod() + mimic_prod_checking_eval_scores() + # future llm calls will now have few-shot examples from the feedback collection + mimic_prod() + print("Done") diff --git a/parea/helpers.py b/parea/helpers.py index b13e8232..bbf27f4e 100644 --- a/parea/helpers.py +++ b/parea/helpers.py @@ -11,7 +11,7 @@ from cattrs import GenConverter from parea.constants import ADJECTIVES, NOUNS, TURN_OFF_PAREA_LOGGING -from parea.schemas.models import Completion, TraceLog, UpdateLog +from parea.schemas.models import Completion, TraceLog, TraceLogTree, UpdateLog from parea.utils.universal_encoder import json_dumps @@ -81,7 +81,7 @@ def timezone_aware_now() -> datetime: return datetime.now(pytz.utc) -def structure_trace_log_from_api(d: dict) -> TraceLog: +def structure_trace_log_from_api(d: dict) -> TraceLogTree: def structure_union_type(obj: Any, cl: type) -> Any: if isinstance(obj, str): return obj @@ -92,10 +92,10 @@ def structure_union_type(obj: Any, cl: type) -> Any: converter = GenConverter() converter.register_structure_hook(Union[str, Dict[str, str], None], structure_union_type) - return converter.structure(d, TraceLog) + return converter.structure(d, TraceLogTree) -def structure_trace_logs_from_api(data: List[dict]) -> List[TraceLog]: +def structure_trace_logs_from_api(data: List[dict]) -> List[TraceLogTree]: return [structure_trace_log_from_api(d) for d in data] diff --git a/parea/schemas/models.py b/parea/schemas/models.py index f6cdd42c..6dec7001 100644 --- a/parea/schemas/models.py +++ b/parea/schemas/models.py @@ -147,7 +147,7 @@ class TraceLog(EvaluatedLog): @define class TraceLogTree(TraceLog): - children: Optional[List[TraceLog]] = field(factory=list) + children_logs: Optional[List[TraceLog]] = field(factory=list) @define From 2e35f911655508329132ec9e5e0556afc3f1d929 Mon Sep 17 00:00:00 2001 From: Joel Alexander Date: Wed, 5 Jun 2024 12:59:52 -0400 Subject: [PATCH 2/2] feat(client): get scores endpoint --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1f7f2aef..aef82259 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "parea-ai" packages = [{ include = "parea" }] -version = "0.2.167a0" +version = "0.2.167" description = "Parea python sdk" readme = "README.md" authors = ["joel-parea-ai "]