From 57a40de2315cd0ea9beca21b7124a54a46e747da Mon Sep 17 00:00:00 2001 From: Joschka Braun Date: Mon, 4 Dec 2023 11:43:00 -0500 Subject: [PATCH] feat: add eval functions to readme --- README.md | 35 ++++++++++++++++++- .../tracing_and_evaluating_openai_endpoint.py | 2 +- pyproject.toml | 2 +- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f3d4aa57..161e00ea 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# parea-sdk +# Parea Python SDK
@@ -26,6 +26,39 @@ or install with `Poetry` poetry add parea-ai ``` +## Evaluating Your LLM App + +You can evaluate any step of your LLM app by wrapping it with a decorator, called `trace`, and specifying the evaluation function(s). +The scores associated with the traces will be logged to the Parea [dashboard](https://app.parea.ai/logs) and/or in a local CSV file if you don't have a Parea API key. + +Evaluation functions receive an argument `log` (of type [Log](parea/schemas/models.py)) and should return a +float between 0 (bad) and 1 (good) inclusive. You don't need to start from scratch, there are pre-defined evaluation functions for [general purpose](parea/evals/general.py), +[chat](parea/evals/chat.py), [RAG](parea/evals/rag.py), and [summarization](parea/evals/summary.py) apps :) + +You can define evaluation functions locally or use the ones you have deployed to Parea's [Test Hub](https://app.parea.ai/test-hub). +If you choose the latter option, the evaluation happens asynchronously and non-blocking. + +A fully locally working cookbook can be found [here](parea/cookbook/tracing_and_evaluating_openai_endpoint.py). +Alternatively, you can add the following code to your codebase to get started: + +```python +import os +from parea import init, InMemoryCache +from parea.schemas.models import Log +from parea.utils.trace_utils import trace + +init(api_key=os.getenv("PAREA_API_KEY"), cache=InMemoryCache()) # use InMemoryCache if you don't have a Parea API key + + +def locally_defined_eval_function(log: Log) -> float: + ... + + +@trace(eval_func_names=['deployed_eval_function_name'], eval_funcs=[locally_defined_eval_function]) +def function_to_evaluate(*args, **kwargs) -> ...: + ... +``` + ## Debugging Chains & Agents You can iterate on your chains & agents much faster by using a local cache. This will allow you to make changes to your diff --git a/parea/cookbook/tracing_and_evaluating_openai_endpoint.py b/parea/cookbook/tracing_and_evaluating_openai_endpoint.py index f9cd3377..831085f7 100644 --- a/parea/cookbook/tracing_and_evaluating_openai_endpoint.py +++ b/parea/cookbook/tracing_and_evaluating_openai_endpoint.py @@ -20,7 +20,7 @@ openai.api_key = os.getenv("OPENAI_API_KEY") -use_cache = True +use_cache = True # by using the in memory cache, you don't need a Parea API key cache = InMemoryCache() if use_cache else None init(api_key=os.getenv("PAREA_API_KEY"), cache=cache) diff --git a/pyproject.toml b/pyproject.toml index 1f9f1f3d..799f4e8d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "parea-ai" packages = [{ include = "parea" }] -version = "0.2.20" +version = "0.2.21" description = "Parea python sdk" readme = "README.md" authors = ["joel-parea-ai "]