From f6801bf2eca6a3234d3e0381d981e49e533f5fd0 Mon Sep 17 00:00:00 2001
From: Joschka Braun <joschka.braun@gmail.com>
Date: Tue, 17 Oct 2023 11:08:00 +0200
Subject: [PATCH 01/13] feat: attach evaluation function via trace decorator

---
 parea/schemas/models.py    |  9 +++++++-
 parea/utils/trace_utils.py | 43 +++++++++++++++++++++++++++++++++-----
 2 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/parea/schemas/models.py b/parea/schemas/models.py
index 089ace63..d91001d0 100644
--- a/parea/schemas/models.py
+++ b/parea/schemas/models.py
@@ -109,6 +109,12 @@ class FeedbackRequest:
     target: Optional[str] = None
 
 
+@define
+class NamedEvaluationScore:
+    name: str
+    score: float = field(validator=[validators.ge(0), validators.le(1)])
+
+
 @define
 class TraceLog:
     trace_id: str
@@ -119,7 +125,6 @@ class TraceLog:
     error: Optional[str] = None
     status: Optional[str] = None
     deployment_id: Optional[str] = None
-    evaluation_metric_ids: Optional[list[int]] = None
     cache_hit: bool = False
     configuration: LLMInputs = LLMInputs()
     latency: Optional[float] = 0.0
@@ -127,6 +132,8 @@ class TraceLog:
     output_tokens: Optional[int] = 0
     total_tokens: Optional[int] = 0
     cost: Optional[float] = 0.0
+    evaluation_metric_ids: Optional[list[int]] = None
+    named_evaluation_scores: Optional[list[NamedEvaluationScore]] = None
     feedback_score: Optional[float] = None
 
     # info filled from decorator
diff --git a/parea/utils/trace_utils.py b/parea/utils/trace_utils.py
index d44f89b4..b15de456 100644
--- a/parea/utils/trace_utils.py
+++ b/parea/utils/trace_utils.py
@@ -1,4 +1,4 @@
-from typing import Any, Optional, Union
+from typing import Any, Optional, Union, Callable
 
 import contextvars
 import inspect
@@ -13,7 +13,7 @@
 
 from parea.helpers import gen_trace_id, to_date_and_time_string
 from parea.parea_logger import parea_logger
-from parea.schemas.models import CompletionResponse, TraceLog
+from parea.schemas.models import CompletionResponse, TraceLog, NamedEvaluationScore
 
 logger = logging.getLogger()
 
@@ -55,6 +55,8 @@ def trace(
     metadata: Optional[dict[str, Any]] = None,
     target: Optional[str] = None,
     end_user_identifier: Optional[str] = None,
+    eval_funcs: Optional[list[Callable]] = None,
+    access_output_of_func: Optional[Callable] = None,
 ):
     def init_trace(func_name, args, kwargs, func) -> tuple[str, float]:
         start_time = time.time()
@@ -87,7 +89,7 @@ def cleanup_trace(trace_id, start_time):
         end_time = time.time()
         trace_data.get()[trace_id].end_timestamp = to_date_and_time_string(end_time)
         trace_data.get()[trace_id].latency = end_time - start_time
-        logger_all_possible(trace_id)
+        thread_eval_funcs_then_log(trace_id, eval_funcs, access_output_of_func)
         trace_context.get().pop()
 
     def decorator(func):
@@ -98,7 +100,8 @@ async def async_wrapper(*args, **kwargs):
             try:
                 result = await func(*args, **kwargs)
                 output = make_output(result, output_as_list)
-                trace_data.get()[trace_id].output = json.dumps(output)
+                trace_data.get()[trace_id].output = output if isinstance(output, str) else json.dumps(output)
+                trace_data.get()[trace_id].status = "success"
             except Exception as e:
                 logger.exception(f"Error occurred in function {func.__name__}, {e}")
                 trace_data.get()[trace_id].error = str(e)
@@ -115,7 +118,8 @@ def wrapper(*args, **kwargs):
             try:
                 result = func(*args, **kwargs)
                 output = make_output(result, output_as_list)
-                trace_data.get()[trace_id].output = json.dumps(output)
+                trace_data.get()[trace_id].output = output if isinstance(output, str) else json.dumps(output)
+                trace_data.get()[trace_id].status = "success"
             except Exception as e:
                 logger.exception(f"Error occurred in function {func.__name__}, {e}")
                 trace_data.get()[trace_id].error = str(e)
@@ -169,3 +173,32 @@ def logger_all_possible(trace_id: str):
         kwargs={"data": trace_data.get()[trace_id]},
     )
     logging_thread.start()
+
+
+def call_eval_funcs_then_log(trace_id: str, eval_funcs: list[Callable] = None, access_output_of_func: Callable = None):
+    data = trace_data.get()[trace_id]
+    try:
+        inputs = data.inputs
+        output = data.output
+        if access_output_of_func:
+            output = access_output_of_func(output)
+        target = data.target
+        if eval_funcs and data.status == "success":
+            data.named_evaluation_scores = []
+            for func in eval_funcs:
+                try:
+                    score = func(inputs=inputs, output=output, target=target)
+                    data.named_evaluation_scores.append(NamedEvaluationScore(name=func.__name__, score=score))
+                except Exception as e:
+                    logger.exception(f"Error occurred calling evaluation function '{func.__name__}', {e}", exc_info=e)
+    except Exception as e:
+        logger.exception(f"Error occurred in when trying to evaluate output, {e}", exc_info=e)
+    parea_logger.default_log(data=data)
+
+
+def thread_eval_funcs_then_log(trace_id: str, eval_funcs: list[Callable] = None, access_output_of_func: Callable = None):
+    logging_thread = threading.Thread(
+        target=call_eval_funcs_then_log,
+        kwargs={"trace_id": trace_id, "eval_funcs": eval_funcs, "access_output_of_func": access_output_of_func},
+    )
+    logging_thread.start()

From 4f3d78f536c8276a9693373ad02f005940f27651 Mon Sep 17 00:00:00 2001
From: Joschka Braun <joschka.braun@gmail.com>
Date: Tue, 17 Oct 2023 11:08:31 +0200
Subject: [PATCH 02/13] refactor: remove lru naming from redis cache

---
 parea/benchmark.py    |  6 +++---
 parea/client.py       |  2 +-
 parea/parea_logger.py | 10 +++++-----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/parea/benchmark.py b/parea/benchmark.py
index a236b1cc..a736bd5f 100644
--- a/parea/benchmark.py
+++ b/parea/benchmark.py
@@ -50,8 +50,8 @@ def async_wrapper(fn, **kwargs):
 
 def run_benchmark(args):
     parser = argparse.ArgumentParser()
-    parser.add_argument("--func", help="Function to test e.g., path/to/my_code.py:argument_chain", type=str)
-    parser.add_argument("--csv_path", help="Path to the input CSV file", type=str)
+    parser.add_argument("--func", help="Function to test e.g., path/to/my_code.py:argument_chain", type=str, required=True)
+    parser.add_argument("--csv_path", help="Path to the input CSV file", type=str, required=True)
     parser.add_argument("--redis_host", help="Redis host", type=str, default=os.getenv("REDIS_HOST", "localhost"))
     parser.add_argument("--redis_port", help="Redis port", type=int, default=int(os.getenv("REDIS_PORT", 6379)))
     parser.add_argument("--redis_password", help="Redis password", type=str, default=None)
@@ -69,7 +69,7 @@ def run_benchmark(args):
             futures = [executor.submit(async_wrapper, fn, **data_input) for data_input in data_inputs]
         else:
             futures = [executor.submit(fn, **data_input) for data_input in data_inputs]
-        for f in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
+        for _ in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
             pass
         print(f"Done with {len(futures)} inputs")
 
diff --git a/parea/client.py b/parea/client.py
index dc8b1d4c..f0043dfb 100644
--- a/parea/client.py
+++ b/parea/client.py
@@ -31,7 +31,7 @@ def __attrs_post_init__(self):
         if self.api_key:
             parea_logger.set_client(self._client)
         if isinstance(self.cache, RedisCache):
-            parea_logger.set_redis_lru_cache(self.cache)
+            parea_logger.set_redis_cache(self.cache)
         _init_parea_wrapper(logger_all_possible, self.cache)
 
     def completion(self, data: Completion) -> CompletionResponse:
diff --git a/parea/parea_logger.py b/parea/parea_logger.py
index 796f4999..67f362b0 100644
--- a/parea/parea_logger.py
+++ b/parea/parea_logger.py
@@ -10,13 +10,13 @@
 @define
 class PareaLogger:
     _client: HTTPClient = field(init=False, default=None)
-    _redis_lru_cache: RedisCache = field(init=False, default=None)
+    _redis_cache: RedisCache = field(init=False, default=None)
 
     def set_client(self, client: HTTPClient) -> None:
         self._client = client
 
-    def set_redis_lru_cache(self, cache: RedisCache) -> None:
-        self._redis_lru_cache = cache
+    def set_redis_cache(self, cache: RedisCache) -> None:
+        self._redis_cache = cache
 
     def record_log(self, data: TraceLog) -> None:
         self._client.request(
@@ -33,10 +33,10 @@ async def arecord_log(self, data: TraceLog) -> None:
         )
 
     def write_log(self, data: TraceLog) -> None:
-        self._redis_lru_cache.log(data)
+        self._redis_cache.log(data)
 
     def default_log(self, data: TraceLog) -> None:
-        if self._redis_lru_cache:
+        if self._redis_cache:
             self.write_log(data)
         if self._client:
             self.record_log(data)

From d70807d1556589e58eadc7b8b7663a000a58ca02 Mon Sep 17 00:00:00 2001
From: Joschka Braun <joschka.braun@gmail.com>
Date: Tue, 17 Oct 2023 13:39:44 +0200
Subject: [PATCH 03/13] docs: update cookbook

---
 parea/cookbook/tracing_with_open_ai_endpoint_directly.py | 9 ++++++++-
 parea/schemas/models.py                                  | 2 +-
 parea/utils/trace_utils.py                               | 4 ++--
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/parea/cookbook/tracing_with_open_ai_endpoint_directly.py b/parea/cookbook/tracing_with_open_ai_endpoint_directly.py
index 90002ad4..7b711321 100644
--- a/parea/cookbook/tracing_with_open_ai_endpoint_directly.py
+++ b/parea/cookbook/tracing_with_open_ai_endpoint_directly.py
@@ -1,5 +1,7 @@
 import os
+import random
 from datetime import datetime
+from typing import Dict, Optional
 
 import openai
 from dotenv import load_dotenv
@@ -19,7 +21,12 @@ def call_llm(data: list[dict], model: str = "gpt-3.5-turbo", temperature: float
     return openai.ChatCompletion.create(model=model, temperature=temperature, messages=data).choices[0].message["content"]
 
 
-@trace
+def random_eval(inputs: Dict[str, str], output, target: Optional[str] = None) -> float:
+    # return random number between 0 and 1
+    return random.random()
+
+
+@trace(eval_funcs=[random_eval])
 def argumentor(query: str, additional_description: str = "") -> str:
     return call_llm(
         [
diff --git a/parea/schemas/models.py b/parea/schemas/models.py
index d91001d0..71f2b962 100644
--- a/parea/schemas/models.py
+++ b/parea/schemas/models.py
@@ -133,7 +133,7 @@ class TraceLog:
     total_tokens: Optional[int] = 0
     cost: Optional[float] = 0.0
     evaluation_metric_ids: Optional[list[int]] = None
-    named_evaluation_scores: Optional[list[NamedEvaluationScore]] = None
+    scores: Optional[list[NamedEvaluationScore]] = None
     feedback_score: Optional[float] = None
 
     # info filled from decorator
diff --git a/parea/utils/trace_utils.py b/parea/utils/trace_utils.py
index b15de456..c4108107 100644
--- a/parea/utils/trace_utils.py
+++ b/parea/utils/trace_utils.py
@@ -184,11 +184,11 @@ def call_eval_funcs_then_log(trace_id: str, eval_funcs: list[Callable] = None, a
             output = access_output_of_func(output)
         target = data.target
         if eval_funcs and data.status == "success":
-            data.named_evaluation_scores = []
+            data.scores = []
             for func in eval_funcs:
                 try:
                     score = func(inputs=inputs, output=output, target=target)
-                    data.named_evaluation_scores.append(NamedEvaluationScore(name=func.__name__, score=score))
+                    data.scores.append(NamedEvaluationScore(name=func.__name__, score=score))
                 except Exception as e:
                     logger.exception(f"Error occurred calling evaluation function '{func.__name__}', {e}", exc_info=e)
     except Exception as e:

From 1bc6bfbd83a8fe7691f035bd352d93a00e998c48 Mon Sep 17 00:00:00 2001
From: Joschka Braun <joschka.braun@gmail.com>
Date: Tue, 17 Oct 2023 13:39:55 +0200
Subject: [PATCH 04/13] docs: update cookbook

---
 parea/cookbook/tracing_with_open_ai_endpoint_directly.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/parea/cookbook/tracing_with_open_ai_endpoint_directly.py b/parea/cookbook/tracing_with_open_ai_endpoint_directly.py
index 7b711321..b245c1e7 100644
--- a/parea/cookbook/tracing_with_open_ai_endpoint_directly.py
+++ b/parea/cookbook/tracing_with_open_ai_endpoint_directly.py
@@ -55,7 +55,7 @@ def critic(argument: str) -> str:
     )
 
 
-@trace
+@trace(eval_funcs=[random_eval])
 def refiner(query: str, additional_description: str, argument: str, criticism: str) -> str:
     return call_llm(
         [
@@ -75,7 +75,7 @@ def refiner(query: str, additional_description: str, argument: str, criticism: s
     )
 
 
-@trace
+@trace(eval_funcs=[random_eval], access_output_of_func=lambda x: x[0])
 def argument_chain(query: str, additional_description: str = "") -> tuple[str, str]:
     trace_id = get_current_trace_id()
     argument = argumentor(query, additional_description)

From b2a09632be6dde09727da72be313c4354da92f34 Mon Sep 17 00:00:00 2001
From: Joschka Braun <joschka.braun@gmail.com>
Date: Tue, 17 Oct 2023 18:47:28 +0200
Subject: [PATCH 05/13] feat: do not trace loacl eval metrics

---
 parea/wrapper/utils.py   | 21 +++++++++++++++++++++
 parea/wrapper/wrapper.py |  5 ++++-
 2 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100644 parea/wrapper/utils.py

diff --git a/parea/wrapper/utils.py b/parea/wrapper/utils.py
new file mode 100644
index 00000000..c90e2940
--- /dev/null
+++ b/parea/wrapper/utils.py
@@ -0,0 +1,21 @@
+from typing import Callable
+import inspect
+from functools import wraps
+
+
+def skip_decorator_if_func_in_stack(func_to_check: Callable) -> Callable:
+    def decorator_wrapper(decorator: Callable) -> Callable:
+
+        def new_decorator(self, func: Callable) -> Callable:  # Include self
+
+            @wraps(func)
+            def wrapper(*args, **kwargs):
+                if any(func_to_check.__name__ in frame.function for frame in inspect.stack()):
+                    return func(*args, **kwargs)
+                return decorator(self, func)(*args, **kwargs)  # Include self
+
+            return wrapper
+
+        return new_decorator
+
+    return decorator_wrapper
diff --git a/parea/wrapper/wrapper.py b/parea/wrapper/wrapper.py
index 35f60fb0..bc1f35a9 100644
--- a/parea/wrapper/wrapper.py
+++ b/parea/wrapper/wrapper.py
@@ -8,7 +8,8 @@
 from parea.cache.cache import Cache
 from parea.helpers import date_and_time_string_to_timestamp
 from parea.schemas.models import TraceLog
-from parea.utils.trace_utils import to_date_and_time_string, trace_context, trace_data
+from parea.utils.trace_utils import to_date_and_time_string, trace_context, trace_data, call_eval_funcs_then_log
+from parea.wrapper.utils import skip_decorator_if_func_in_stack
 
 
 class Wrapper:
@@ -88,6 +89,7 @@ def _init_trace(self) -> Tuple[str, float]:
 
         return trace_id, start_time
 
+    @skip_decorator_if_func_in_stack(call_eval_funcs_then_log)
     def async_decorator(self, orig_func: Callable) -> Callable:
         async def wrapper(*args, **kwargs):
             trace_id, start_time = self._init_trace()
@@ -113,6 +115,7 @@ async def wrapper(*args, **kwargs):
 
         return wrapper
 
+    @skip_decorator_if_func_in_stack(call_eval_funcs_then_log)
     def sync_decorator(self, orig_func: Callable) -> Callable:
         def wrapper(*args, **kwargs):
             trace_id, start_time = self._init_trace()

From bcd4e567bfb97987c10d594bd441c6030b03cfb7 Mon Sep 17 00:00:00 2001
From: Joschka Braun <joschka.braun@gmail.com>
Date: Tue, 17 Oct 2023 18:47:54 +0200
Subject: [PATCH 06/13] feat: add unhelpful chat example

---
 .../tracing_and_evaluating_openai_endpoint.py | 119 ++++++++++++++++++
 1 file changed, 119 insertions(+)
 create mode 100644 parea/cookbook/tracing_and_evaluating_openai_endpoint.py

diff --git a/parea/cookbook/tracing_and_evaluating_openai_endpoint.py b/parea/cookbook/tracing_and_evaluating_openai_endpoint.py
new file mode 100644
index 00000000..9b92c206
--- /dev/null
+++ b/parea/cookbook/tracing_and_evaluating_openai_endpoint.py
@@ -0,0 +1,119 @@
+import os
+from typing import Dict, List
+
+import openai
+from dotenv import load_dotenv
+
+from parea import init
+from parea.utils.trace_utils import trace
+
+load_dotenv()
+
+openai.api_key = os.getenv("OPENAI_API_KEY")
+
+init(api_key=os.getenv("PAREA_API_KEY"))
+
+
+def call_llm(data: list[dict], model: str = "gpt-3.5-turbo", temperature: float = 0.0) -> str:
+    return openai.ChatCompletion.create(model=model, temperature=temperature, messages=data).choices[0].message["content"]
+
+
+def friendliness(inputs: Dict, output: str, target: str = None) -> float:
+    response = call_llm(
+        [
+            {
+                "role": "system",
+                "content": "You evaluate the friendliness of the following response on a scale of 0 to 10. You must only return a number."
+            },
+            {"role": "assistant", "content": output},
+        ],
+        model='gpt-4'
+    )
+    try:
+        return float(response) / 10.0
+    except TypeError:
+        return 0.0
+
+
+def usefulness(inputs: Dict, output: str, target: str = None) -> float:
+    user_input = inputs['messages'][-1]["content"]
+    response = call_llm(
+        [
+            {
+                "role": "system",
+                "content": "You evaluate the usefulness of the response given the user input on a scale of 0 to 10. You must only return a number."
+            },
+            {"role": "assistant", "content": f'''User input: "{user_input}"\nAssistant response: "{output}"'''}
+        ],
+        model='gpt-4'
+    )
+    try:
+        return float(response) / 10.0
+    except TypeError:
+        return 0.0
+
+
+@trace(eval_funcs=[friendliness, usefulness])
+def helpful_the_second_time(messages: List[Dict[str, str]]) -> str:
+    helpful_response = call_llm(
+        [
+            {
+                "role": "system",
+                "content": "You are a friendly, and helpful assistant that helps people with their homework."
+            },
+
+        ] + messages,
+        model='gpt-4'
+    )
+
+    has_user_asked_before_raw = call_llm(
+        [
+            {
+                "role": "system",
+                "content": "Assess if the user has asked the last question before or is asking again for more \
+information on a previous topic. If so, respond ASKED_BEFORE. Otherwise, respond NOT_ASKED_BEFORE."
+            }
+        ] + messages,
+        model='gpt-4'
+    )
+    has_user_asked_before = has_user_asked_before_raw == "ASKED_BEFORE"
+
+    if has_user_asked_before:
+        messages.append({"role": "assistant", "content": helpful_response})
+        return helpful_response
+    else:
+        unhelfpul_response = call_llm(
+            [
+                {
+                    "role": "system",
+                    "content": "Given the helpful response to the user input below, please provide a slightly unhelpful \
+    response which makes the user ask again in case they didn't ask already again because of a previous unhelpful answer. \
+    In case the user asked again, please provide a last response"
+                },
+            ] + messages + [{"role": "assistant", "content": helpful_response}],
+            model='gpt-4'
+        )
+        messages.append({"role": "assistant", "content": unhelfpul_response})
+        return unhelfpul_response
+
+
+@trace
+def unhelpful_chat():
+    print("Welcome to the chat! Type 'exit' to end the session.")
+
+    messages = []
+    while True:
+        user_input = input("\nYou: ")
+
+        if user_input.lower() == 'exit':
+            print("Goodbye!")
+            break
+
+        messages.append({"role": "user", "content": user_input})
+        print("Bot:", helpful_the_second_time(messages))
+
+    return messages
+
+
+if __name__ == "__main__":
+    unhelpful_chat()

From 79665402761852cdb04fd093521a6799c4f41c18 Mon Sep 17 00:00:00 2001
From: Joschka Braun <joschka.braun@gmail.com>
Date: Tue, 17 Oct 2023 19:17:12 +0200
Subject: [PATCH 07/13] feat: allow export to CSV

---
 parea/benchmark.py                            | 18 ++++-------
 .../tracing_and_evaluating_openai_endpoint.py | 31 ++++++++++++++++---
 parea/helpers.py                              | 17 ++++++++++
 3 files changed, 49 insertions(+), 17 deletions(-)

diff --git a/parea/benchmark.py b/parea/benchmark.py
index a736bd5f..b90e4722 100644
--- a/parea/benchmark.py
+++ b/parea/benchmark.py
@@ -13,6 +13,7 @@
 from tqdm import tqdm
 
 from parea.cache.redis import RedisCache
+from parea.helpers import write_trace_logs_to_csv
 from parea.schemas.models import TraceLog
 
 
@@ -73,19 +74,12 @@ def run_benchmark(args):
             pass
         print(f"Done with {len(futures)} inputs")
 
-        redis_cache = RedisCache(key_logs=redis_logs_key)
-
-        trace_logs: list[TraceLog] = redis_cache.read_logs()
+        redis_cache = RedisCache(
+            key_logs=redis_logs_key, host=args.redis_host, port=args.redis_port, password=args.redis_password
+        )
 
         # write to csv
         path_csv = f"trace_logs-{int(time.time())}.csv"
-        with open(path_csv, "w", newline="") as file:
-            # write header
-            columns = fields_dict(TraceLog).keys()
-            writer = csv.DictWriter(file, fieldnames=columns)
-            writer.writeheader()
-            # write rows
-            for trace_log in trace_logs:
-                writer.writerow(asdict(trace_log))
-
+        trace_logs: list[TraceLog] = redis_cache.read_logs()
+        write_trace_logs_to_csv(path_csv, trace_logs)
         print(f"Wrote CSV of results to: {path_csv}")
diff --git a/parea/cookbook/tracing_and_evaluating_openai_endpoint.py b/parea/cookbook/tracing_and_evaluating_openai_endpoint.py
index 9b92c206..9660be8f 100644
--- a/parea/cookbook/tracing_and_evaluating_openai_endpoint.py
+++ b/parea/cookbook/tracing_and_evaluating_openai_endpoint.py
@@ -1,17 +1,22 @@
 import os
+import time
 from typing import Dict, List
 
 import openai
 from dotenv import load_dotenv
 
-from parea import init
-from parea.utils.trace_utils import trace
+from parea import init, RedisCache
+from parea.helpers import write_trace_logs_to_csv
+from parea.utils.trace_utils import trace, get_current_trace_id
 
 load_dotenv()
 
 openai.api_key = os.getenv("OPENAI_API_KEY")
 
-init(api_key=os.getenv("PAREA_API_KEY"))
+
+use_cache = True
+cache = RedisCache() if use_cache else None
+init(api_key=os.getenv("PAREA_API_KEY"), cache=cache)
 
 
 def call_llm(data: list[dict], model: str = "gpt-3.5-turbo", temperature: float = 0.0) -> str:
@@ -101,6 +106,8 @@ def helpful_the_second_time(messages: List[Dict[str, str]]) -> str:
 def unhelpful_chat():
     print("Welcome to the chat! Type 'exit' to end the session.")
 
+    trace_id = get_current_trace_id()
+
     messages = []
     while True:
         user_input = input("\nYou: ")
@@ -112,8 +119,22 @@ def unhelpful_chat():
         messages.append({"role": "user", "content": user_input})
         print("Bot:", helpful_the_second_time(messages))
 
-    return messages
+    return messages, trace_id
+
+
+def main():
+    _ , trace_id = unhelpful_chat()
+
+    time.sleep(0.2)
+
+    if use_cache:
+        path_csv = f"trace_logs-{int(time.time())}.csv"
+        trace_logs = cache.read_logs()
+        write_trace_logs_to_csv(path_csv, trace_logs)
+        print(f"CSV-file of results: {path_csv}")
+    if os.getenv("PAREA_API_KEY"):
+        print(f'You can view the logs at: https://optimusprompt.ai/logs/detailed/{trace_id}')
 
 
 if __name__ == "__main__":
-    unhelpful_chat()
+    main()
diff --git a/parea/helpers.py b/parea/helpers.py
index 67eac45b..b4391d61 100644
--- a/parea/helpers.py
+++ b/parea/helpers.py
@@ -1,5 +1,11 @@
+import csv
 import time
 import uuid
+from typing import List
+
+from attr import fields_dict, asdict
+
+from parea.schemas.models import TraceLog
 
 
 def gen_trace_id() -> str:
@@ -13,3 +19,14 @@ def to_date_and_time_string(timestamp: float) -> str:
 
 def date_and_time_string_to_timestamp(date_and_time_string: str) -> float:
     return time.mktime(time.strptime(date_and_time_string, "%Y-%m-%d %H:%M:%S %Z"))
+
+
+def write_trace_logs_to_csv(path_csv: str, trace_logs: List[TraceLog]):
+    with open(path_csv, "w", newline="") as file:
+        # write header
+        columns = fields_dict(TraceLog).keys()
+        writer = csv.DictWriter(file, fieldnames=columns)
+        writer.writeheader()
+        # write rows
+        for trace_log in trace_logs:
+            writer.writerow(asdict(trace_log))

From 2257df657176362545d5c85b374a0a111457c3aa Mon Sep 17 00:00:00 2001
From: Joschka Braun <joschka.braun@gmail.com>
Date: Tue, 17 Oct 2023 23:20:27 +0200
Subject: [PATCH 08/13] feat: add goal success ratio

---
 .../tracing_and_evaluating_openai_endpoint.py | 45 ++++++++++++++++---
 parea/utils/trace_utils.py                    |  6 ++-
 2 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/parea/cookbook/tracing_and_evaluating_openai_endpoint.py b/parea/cookbook/tracing_and_evaluating_openai_endpoint.py
index 9660be8f..1bd7a2b0 100644
--- a/parea/cookbook/tracing_and_evaluating_openai_endpoint.py
+++ b/parea/cookbook/tracing_and_evaluating_openai_endpoint.py
@@ -1,3 +1,4 @@
+import json
 import os
 import time
 from typing import Dict, List
@@ -23,6 +24,41 @@ def call_llm(data: list[dict], model: str = "gpt-3.5-turbo", temperature: float
     return openai.ChatCompletion.create(model=model, temperature=temperature, messages=data).choices[0].message["content"]
 
 
+def goal_success_ratio(inputs: Dict, output: str, target: str = None) -> float:
+    """Returns the average amount of turns the user had to converse with the AI to reach their goals."""
+    output = json.loads(output)
+    # need to determine where does a new goal start
+    conversation_segments = []
+    start_index = 0
+    end_index = 3
+    while end_index < len(output):
+        user_follows_same_goal = call_llm(
+            [
+                {
+                    "role": "system",
+                    "content": "Look at the conversation and to determine if the user is still following the same goal "
+                               "or if they are following a new goal. If they are following the same goal, respond "
+                               "SAME_GOAL. Otherwise, respond NEW_GOAL. In any case do not answer the user request!"
+                }
+            ] + output[start_index:end_index],
+            model='gpt-4'
+        )
+
+        if user_follows_same_goal == "SAME_GOAL":
+            end_index += 2
+        else:
+            conversation_segments.append(output[start_index:end_index - 1])
+            start_index = end_index - 1
+            end_index += 2
+
+    if start_index < len(output):
+        conversation_segments.append(output[start_index:])
+
+    # for now assume that the user reached their goal in every segment
+    # so we can return the average amount of turns the user had to converse with the AI to reach their goals
+    return sum([2 / len(segment) for segment in conversation_segments]) / len(conversation_segments)
+
+
 def friendliness(inputs: Dict, output: str, target: str = None) -> float:
     response = call_llm(
         [
@@ -102,7 +138,7 @@ def helpful_the_second_time(messages: List[Dict[str, str]]) -> str:
         return unhelfpul_response
 
 
-@trace
+@trace(eval_funcs=[goal_success_ratio], access_output_of_func=lambda x: x[0])
 def unhelpful_chat():
     print("Welcome to the chat! Type 'exit' to end the session.")
 
@@ -125,15 +161,14 @@ def unhelpful_chat():
 def main():
     _ , trace_id = unhelpful_chat()
 
-    time.sleep(0.2)
-
+    if os.getenv("PAREA_API_KEY"):
+        print(f'You can view the logs at: https://optimusprompt.ai/logs/detailed/{trace_id}')
     if use_cache:
+        time.sleep(5)  # wait for local eval function to finish
         path_csv = f"trace_logs-{int(time.time())}.csv"
         trace_logs = cache.read_logs()
         write_trace_logs_to_csv(path_csv, trace_logs)
         print(f"CSV-file of results: {path_csv}")
-    if os.getenv("PAREA_API_KEY"):
-        print(f'You can view the logs at: https://optimusprompt.ai/logs/detailed/{trace_id}')
 
 
 if __name__ == "__main__":
diff --git a/parea/utils/trace_utils.py b/parea/utils/trace_utils.py
index c4108107..37035f03 100644
--- a/parea/utils/trace_utils.py
+++ b/parea/utils/trace_utils.py
@@ -180,10 +180,12 @@ def call_eval_funcs_then_log(trace_id: str, eval_funcs: list[Callable] = None, a
     try:
         inputs = data.inputs
         output = data.output
-        if access_output_of_func:
-            output = access_output_of_func(output)
         target = data.target
         if eval_funcs and data.status == "success":
+            if access_output_of_func:
+                output = json.loads(output)
+                output = access_output_of_func(output)
+                output = json.dumps(output)
             data.scores = []
             for func in eval_funcs:
                 try:

From 924f11fef896eeec6ef648b9dd372004f89a6049 Mon Sep 17 00:00:00 2001
From: Joschka Braun <joschka.braun@gmail.com>
Date: Tue, 17 Oct 2023 23:21:23 +0200
Subject: [PATCH 09/13] style

---
 parea/benchmark.py                            |  4 +-
 .../tracing_and_evaluating_openai_endpoint.py | 68 +++++++++----------
 .../tracing_with_open_ai_endpoint_directly.py |  3 +-
 parea/helpers.py                              |  5 +-
 parea/utils/trace_utils.py                    |  4 +-
 parea/wrapper/utils.py                        |  3 +-
 parea/wrapper/wrapper.py                      |  2 +-
 7 files changed, 41 insertions(+), 48 deletions(-)

diff --git a/parea/benchmark.py b/parea/benchmark.py
index b90e4722..caea82a1 100644
--- a/parea/benchmark.py
+++ b/parea/benchmark.py
@@ -74,9 +74,7 @@ def run_benchmark(args):
             pass
         print(f"Done with {len(futures)} inputs")
 
-        redis_cache = RedisCache(
-            key_logs=redis_logs_key, host=args.redis_host, port=args.redis_port, password=args.redis_password
-        )
+        redis_cache = RedisCache(key_logs=redis_logs_key, host=args.redis_host, port=args.redis_port, password=args.redis_password)
 
         # write to csv
         path_csv = f"trace_logs-{int(time.time())}.csv"
diff --git a/parea/cookbook/tracing_and_evaluating_openai_endpoint.py b/parea/cookbook/tracing_and_evaluating_openai_endpoint.py
index 1bd7a2b0..8086b1fb 100644
--- a/parea/cookbook/tracing_and_evaluating_openai_endpoint.py
+++ b/parea/cookbook/tracing_and_evaluating_openai_endpoint.py
@@ -1,14 +1,15 @@
+from typing import Dict, List
+
 import json
 import os
 import time
-from typing import Dict, List
 
 import openai
 from dotenv import load_dotenv
 
-from parea import init, RedisCache
+from parea import RedisCache, init
 from parea.helpers import write_trace_logs_to_csv
-from parea.utils.trace_utils import trace, get_current_trace_id
+from parea.utils.trace_utils import get_current_trace_id, trace
 
 load_dotenv()
 
@@ -37,17 +38,18 @@ def goal_success_ratio(inputs: Dict, output: str, target: str = None) -> float:
                 {
                     "role": "system",
                     "content": "Look at the conversation and to determine if the user is still following the same goal "
-                               "or if they are following a new goal. If they are following the same goal, respond "
-                               "SAME_GOAL. Otherwise, respond NEW_GOAL. In any case do not answer the user request!"
+                    "or if they are following a new goal. If they are following the same goal, respond "
+                    "SAME_GOAL. Otherwise, respond NEW_GOAL. In any case do not answer the user request!",
                 }
-            ] + output[start_index:end_index],
-            model='gpt-4'
+            ]
+            + output[start_index:end_index],
+            model="gpt-4",
         )
 
         if user_follows_same_goal == "SAME_GOAL":
             end_index += 2
         else:
-            conversation_segments.append(output[start_index:end_index - 1])
+            conversation_segments.append(output[start_index : end_index - 1])
             start_index = end_index - 1
             end_index += 2
 
@@ -62,13 +64,10 @@ def goal_success_ratio(inputs: Dict, output: str, target: str = None) -> float:
 def friendliness(inputs: Dict, output: str, target: str = None) -> float:
     response = call_llm(
         [
-            {
-                "role": "system",
-                "content": "You evaluate the friendliness of the following response on a scale of 0 to 10. You must only return a number."
-            },
+            {"role": "system", "content": "You evaluate the friendliness of the following response on a scale of 0 to 10. You must only return a number."},
             {"role": "assistant", "content": output},
         ],
-        model='gpt-4'
+        model="gpt-4",
     )
     try:
         return float(response) / 10.0
@@ -77,16 +76,13 @@ def friendliness(inputs: Dict, output: str, target: str = None) -> float:
 
 
 def usefulness(inputs: Dict, output: str, target: str = None) -> float:
-    user_input = inputs['messages'][-1]["content"]
+    user_input = inputs["messages"][-1]["content"]
     response = call_llm(
         [
-            {
-                "role": "system",
-                "content": "You evaluate the usefulness of the response given the user input on a scale of 0 to 10. You must only return a number."
-            },
-            {"role": "assistant", "content": f'''User input: "{user_input}"\nAssistant response: "{output}"'''}
+            {"role": "system", "content": "You evaluate the usefulness of the response given the user input on a scale of 0 to 10. You must only return a number."},
+            {"role": "assistant", "content": f'''User input: "{user_input}"\nAssistant response: "{output}"'''},
         ],
-        model='gpt-4'
+        model="gpt-4",
     )
     try:
         return float(response) / 10.0
@@ -98,13 +94,10 @@ def usefulness(inputs: Dict, output: str, target: str = None) -> float:
 def helpful_the_second_time(messages: List[Dict[str, str]]) -> str:
     helpful_response = call_llm(
         [
-            {
-                "role": "system",
-                "content": "You are a friendly, and helpful assistant that helps people with their homework."
-            },
-
-        ] + messages,
-        model='gpt-4'
+            {"role": "system", "content": "You are a friendly, and helpful assistant that helps people with their homework."},
+        ]
+        + messages,
+        model="gpt-4",
     )
 
     has_user_asked_before_raw = call_llm(
@@ -112,10 +105,11 @@ def helpful_the_second_time(messages: List[Dict[str, str]]) -> str:
             {
                 "role": "system",
                 "content": "Assess if the user has asked the last question before or is asking again for more \
-information on a previous topic. If so, respond ASKED_BEFORE. Otherwise, respond NOT_ASKED_BEFORE."
+information on a previous topic. If so, respond ASKED_BEFORE. Otherwise, respond NOT_ASKED_BEFORE.",
             }
-        ] + messages,
-        model='gpt-4'
+        ]
+        + messages,
+        model="gpt-4",
     )
     has_user_asked_before = has_user_asked_before_raw == "ASKED_BEFORE"
 
@@ -129,10 +123,12 @@ def helpful_the_second_time(messages: List[Dict[str, str]]) -> str:
                     "role": "system",
                     "content": "Given the helpful response to the user input below, please provide a slightly unhelpful \
     response which makes the user ask again in case they didn't ask already again because of a previous unhelpful answer. \
-    In case the user asked again, please provide a last response"
+    In case the user asked again, please provide a last response",
                 },
-            ] + messages + [{"role": "assistant", "content": helpful_response}],
-            model='gpt-4'
+            ]
+            + messages
+            + [{"role": "assistant", "content": helpful_response}],
+            model="gpt-4",
         )
         messages.append({"role": "assistant", "content": unhelfpul_response})
         return unhelfpul_response
@@ -148,7 +144,7 @@ def unhelpful_chat():
     while True:
         user_input = input("\nYou: ")
 
-        if user_input.lower() == 'exit':
+        if user_input.lower() == "exit":
             print("Goodbye!")
             break
 
@@ -159,10 +155,10 @@ def unhelpful_chat():
 
 
 def main():
-    _ , trace_id = unhelpful_chat()
+    _, trace_id = unhelpful_chat()
 
     if os.getenv("PAREA_API_KEY"):
-        print(f'You can view the logs at: https://optimusprompt.ai/logs/detailed/{trace_id}')
+        print(f"You can view the logs at: https://optimusprompt.ai/logs/detailed/{trace_id}")
     if use_cache:
         time.sleep(5)  # wait for local eval function to finish
         path_csv = f"trace_logs-{int(time.time())}.csv"
diff --git a/parea/cookbook/tracing_with_open_ai_endpoint_directly.py b/parea/cookbook/tracing_with_open_ai_endpoint_directly.py
index b245c1e7..14cd4656 100644
--- a/parea/cookbook/tracing_with_open_ai_endpoint_directly.py
+++ b/parea/cookbook/tracing_with_open_ai_endpoint_directly.py
@@ -1,7 +1,8 @@
+from typing import Dict, Optional
+
 import os
 import random
 from datetime import datetime
-from typing import Dict, Optional
 
 import openai
 from dotenv import load_dotenv
diff --git a/parea/helpers.py b/parea/helpers.py
index b4391d61..e80c8e45 100644
--- a/parea/helpers.py
+++ b/parea/helpers.py
@@ -1,9 +1,8 @@
 import csv
 import time
 import uuid
-from typing import List
 
-from attr import fields_dict, asdict
+from attr import asdict, fields_dict
 
 from parea.schemas.models import TraceLog
 
@@ -21,7 +20,7 @@ def date_and_time_string_to_timestamp(date_and_time_string: str) -> float:
     return time.mktime(time.strptime(date_and_time_string, "%Y-%m-%d %H:%M:%S %Z"))
 
 
-def write_trace_logs_to_csv(path_csv: str, trace_logs: List[TraceLog]):
+def write_trace_logs_to_csv(path_csv: str, trace_logs: list[TraceLog]):
     with open(path_csv, "w", newline="") as file:
         # write header
         columns = fields_dict(TraceLog).keys()
diff --git a/parea/utils/trace_utils.py b/parea/utils/trace_utils.py
index 37035f03..c8080308 100644
--- a/parea/utils/trace_utils.py
+++ b/parea/utils/trace_utils.py
@@ -1,4 +1,4 @@
-from typing import Any, Optional, Union, Callable
+from typing import Any, Callable, Optional, Union
 
 import contextvars
 import inspect
@@ -13,7 +13,7 @@
 
 from parea.helpers import gen_trace_id, to_date_and_time_string
 from parea.parea_logger import parea_logger
-from parea.schemas.models import CompletionResponse, TraceLog, NamedEvaluationScore
+from parea.schemas.models import CompletionResponse, NamedEvaluationScore, TraceLog
 
 logger = logging.getLogger()
 
diff --git a/parea/wrapper/utils.py b/parea/wrapper/utils.py
index c90e2940..50cc1ecf 100644
--- a/parea/wrapper/utils.py
+++ b/parea/wrapper/utils.py
@@ -1,13 +1,12 @@
 from typing import Callable
+
 import inspect
 from functools import wraps
 
 
 def skip_decorator_if_func_in_stack(func_to_check: Callable) -> Callable:
     def decorator_wrapper(decorator: Callable) -> Callable:
-
         def new_decorator(self, func: Callable) -> Callable:  # Include self
-
             @wraps(func)
             def wrapper(*args, **kwargs):
                 if any(func_to_check.__name__ in frame.function for frame in inspect.stack()):
diff --git a/parea/wrapper/wrapper.py b/parea/wrapper/wrapper.py
index bc1f35a9..81fc9e55 100644
--- a/parea/wrapper/wrapper.py
+++ b/parea/wrapper/wrapper.py
@@ -8,7 +8,7 @@
 from parea.cache.cache import Cache
 from parea.helpers import date_and_time_string_to_timestamp
 from parea.schemas.models import TraceLog
-from parea.utils.trace_utils import to_date_and_time_string, trace_context, trace_data, call_eval_funcs_then_log
+from parea.utils.trace_utils import call_eval_funcs_then_log, to_date_and_time_string, trace_context, trace_data
 from parea.wrapper.utils import skip_decorator_if_func_in_stack
 
 

From 1150e9b0c2e292f27e7d7d72b93eebb933eb2473 Mon Sep 17 00:00:00 2001
From: Joschka Braun <joschka.braun@gmail.com>
Date: Fri, 20 Oct 2023 10:18:29 +0200
Subject: [PATCH 10/13] feat: expose eval metric names via trace decorator

---
 parea/cookbook/tracing_and_evaluating_openai_endpoint.py | 7 +++++++
 parea/schemas/models.py                                  | 2 +-
 parea/utils/trace_utils.py                               | 3 +++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/parea/cookbook/tracing_and_evaluating_openai_endpoint.py b/parea/cookbook/tracing_and_evaluating_openai_endpoint.py
index 8086b1fb..0b387a17 100644
--- a/parea/cookbook/tracing_and_evaluating_openai_endpoint.py
+++ b/parea/cookbook/tracing_and_evaluating_openai_endpoint.py
@@ -165,6 +165,13 @@ def main():
         trace_logs = cache.read_logs()
         write_trace_logs_to_csv(path_csv, trace_logs)
         print(f"CSV-file of results: {path_csv}")
+        parent_trace = None
+        for trace_log in trace_logs:
+            if trace_log.trace_id == trace_id:
+                parent_trace = trace_log
+                break
+        if parent_trace:
+            print(f'Overall score(s):\n{json.dumps(parent_trace.scores)}')
 
 
 if __name__ == "__main__":
diff --git a/parea/schemas/models.py b/parea/schemas/models.py
index 71f2b962..5cf5928f 100644
--- a/parea/schemas/models.py
+++ b/parea/schemas/models.py
@@ -132,7 +132,7 @@ class TraceLog:
     output_tokens: Optional[int] = 0
     total_tokens: Optional[int] = 0
     cost: Optional[float] = 0.0
-    evaluation_metric_ids: Optional[list[int]] = None
+    evaluation_metric_names: Optional[list[str]] = None
     scores: Optional[list[NamedEvaluationScore]] = None
     feedback_score: Optional[float] = None
 
diff --git a/parea/utils/trace_utils.py b/parea/utils/trace_utils.py
index c8080308..6efab992 100644
--- a/parea/utils/trace_utils.py
+++ b/parea/utils/trace_utils.py
@@ -55,6 +55,7 @@ def trace(
     metadata: Optional[dict[str, Any]] = None,
     target: Optional[str] = None,
     end_user_identifier: Optional[str] = None,
+    eval_funcs_names: Optional[list[str]] = None,
     eval_funcs: Optional[list[Callable]] = None,
     access_output_of_func: Optional[Callable] = None,
 ):
@@ -102,6 +103,7 @@ async def async_wrapper(*args, **kwargs):
                 output = make_output(result, output_as_list)
                 trace_data.get()[trace_id].output = output if isinstance(output, str) else json.dumps(output)
                 trace_data.get()[trace_id].status = "success"
+                trace_data.get()[trace_id].evaluation_metric_names = eval_funcs_names
             except Exception as e:
                 logger.exception(f"Error occurred in function {func.__name__}, {e}")
                 trace_data.get()[trace_id].error = str(e)
@@ -120,6 +122,7 @@ def wrapper(*args, **kwargs):
                 output = make_output(result, output_as_list)
                 trace_data.get()[trace_id].output = output if isinstance(output, str) else json.dumps(output)
                 trace_data.get()[trace_id].status = "success"
+                trace_data.get()[trace_id].evaluation_metric_names = eval_funcs_names
             except Exception as e:
                 logger.exception(f"Error occurred in function {func.__name__}, {e}")
                 trace_data.get()[trace_id].error = str(e)

From b98b2269647fa2fd0753a68427e4b94d8f180638 Mon Sep 17 00:00:00 2001
From: Joschka Braun <joschka.braun@gmail.com>
Date: Fri, 20 Oct 2023 11:44:21 +0200
Subject: [PATCH 11/13] feat: send transformed output for eval metric to
 backend

---
 parea/schemas/models.py    |  1 +
 parea/utils/trace_utils.py | 14 ++++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/parea/schemas/models.py b/parea/schemas/models.py
index 5cf5928f..c336b71c 100644
--- a/parea/schemas/models.py
+++ b/parea/schemas/models.py
@@ -132,6 +132,7 @@ class TraceLog:
     output_tokens: Optional[int] = 0
     total_tokens: Optional[int] = 0
     cost: Optional[float] = 0.0
+    output_for_eval_metrics: Optional[str] = None
     evaluation_metric_names: Optional[list[str]] = None
     scores: Optional[list[NamedEvaluationScore]] = None
     feedback_score: Optional[float] = None
diff --git a/parea/utils/trace_utils.py b/parea/utils/trace_utils.py
index 6efab992..d85ca4ca 100644
--- a/parea/utils/trace_utils.py
+++ b/parea/utils/trace_utils.py
@@ -182,17 +182,19 @@ def call_eval_funcs_then_log(trace_id: str, eval_funcs: list[Callable] = None, a
     data = trace_data.get()[trace_id]
     try:
         inputs = data.inputs
-        output = data.output
         target = data.target
+        if access_output_of_func:
+            output = json.loads(data.output)
+            output = access_output_of_func(output)
+            output_for_eval_metrics = json.dumps(output)
+        else:
+            output_for_eval_metrics = data.output
+        data.output_for_eval_metrics = output_for_eval_metrics
         if eval_funcs and data.status == "success":
-            if access_output_of_func:
-                output = json.loads(output)
-                output = access_output_of_func(output)
-                output = json.dumps(output)
             data.scores = []
             for func in eval_funcs:
                 try:
-                    score = func(inputs=inputs, output=output, target=target)
+                    score = func(inputs=inputs, output=output_for_eval_metrics, target=target)
                     data.scores.append(NamedEvaluationScore(name=func.__name__, score=score))
                 except Exception as e:
                     logger.exception(f"Error occurred calling evaluation function '{func.__name__}', {e}", exc_info=e)

From 089e26ba5b923a26707ba1e53bd56a8f78a2652f Mon Sep 17 00:00:00 2001
From: Joschka Braun <joschka.braun@gmail.com>
Date: Fri, 20 Oct 2023 12:16:14 +0200
Subject: [PATCH 12/13] chore: bump version

---
 parea/cookbook/tracing_and_evaluating_openai_endpoint.py | 2 +-
 pyproject.toml                                           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/parea/cookbook/tracing_and_evaluating_openai_endpoint.py b/parea/cookbook/tracing_and_evaluating_openai_endpoint.py
index 0b387a17..b1989f71 100644
--- a/parea/cookbook/tracing_and_evaluating_openai_endpoint.py
+++ b/parea/cookbook/tracing_and_evaluating_openai_endpoint.py
@@ -171,7 +171,7 @@ def main():
                 parent_trace = trace_log
                 break
         if parent_trace:
-            print(f'Overall score(s):\n{json.dumps(parent_trace.scores)}')
+            print(f"Overall score(s):\n{json.dumps(parent_trace.scores)}")
 
 
 if __name__ == "__main__":
diff --git a/pyproject.toml b/pyproject.toml
index 3b6f2487..d4b3be8b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "parea-ai"
 packages = [{ include = "parea" }]
-version = "0.2.12"
+version = "0.2.13"
 description = "Parea python sdk"
 readme = "README.md"
 authors = ["joel-parea-ai <joel@parea.ai>"]

From 68944a41103edb0d90adc7bf4e98084c1ce57be0 Mon Sep 17 00:00:00 2001
From: Joschka Braun <joschka.braun@gmail.com>
Date: Fri, 20 Oct 2023 12:20:34 +0200
Subject: [PATCH 13/13] chore: bump version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index a8a6817c..d4b3be8b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "parea-ai"
 packages = [{ include = "parea" }]
-version = "0.2.13a0"
+version = "0.2.13"
 description = "Parea python sdk"
 readme = "README.md"
 authors = ["joel-parea-ai <joel@parea.ai>"]